import gradio as gr
from functools import lru_cache

# Cache model loading to optimize performance
@lru_cache(maxsize=3)
def load_hf_model(model_name):
    return gr.load(
        name=f"deepseek-ai/{model_name}",
        src="huggingface",
        api_name="/chat"
    )

# Load all models at startup
MODELS = {
    "DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"),
    "DeepSeek-R1": load_hf_model("DeepSeek-R1"),
    "DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero")
}

def parse_response(response):
    """Handle different response formats from various models"""
    if isinstance(response, list):
        if len(response) > 0:
            # Handle list of messages format
            return response[0].get('generated_text', 
                   response[0].get('content', str(response[0])))
    elif isinstance(response, dict):
        # Handle OpenAI-style format
        if 'choices' in response:
            return response['choices'][0]['message']['content']
        # Handle standard text generation format
        elif 'generated_text' in response:
            return response['generated_text']
    return f"Unsupported response format: {type(response)}"

def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
    history = history or []
    
    model_component = MODELS[model_choice]
    
    # Construct messages with optional system message
    messages = []
    if system_message.strip():
        messages.append({"role": "system", "content": system_message})
    messages.append({"role": "user", "content": input_text})
    
    payload = {
        "messages": messages,
        "max_tokens": max_new_tokens,
        "temperature": temperature,
        "top_p": top_p
    }
    
    try:
        response = model_component(payload)
        assistant_response = parse_response(response)
    except Exception as e:
        assistant_response = f"Error: {str(e)}"
    
    history.append((input_text, assistant_response))
    return history, history, ""

# Interface setup remains the same
with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as demo:
    gr.Markdown("""# DeepSeek Chatbot""")

    with gr.Row():
        with gr.Column():
            chatbot_output = gr.Chatbot(height=500)
            msg = gr.Textbox(placeholder="Type your message...")
            with gr.Row():
                submit_btn = gr.Button("Submit", variant="primary")
                clear_btn = gr.ClearButton([msg, chatbot_output])

    with gr.Row():
        with gr.Accordion("Options", open=True):
            model_choice = gr.Radio(
                choices=list(MODELS.keys()),
                value="DeepSeek-R1"
            )
            with gr.Accordion("Optional Parameters", open=False):
                system_message = gr.Textbox(
                    value="You are a helpful AI assistant",
                    lines=2
                )
                max_new_tokens = gr.Slider(1, 4000, 200)
                temperature = gr.Slider(0.1, 4.0, 0.7)
                top_p = gr.Slider(0.1, 1.0, 0.9)

    chat_history = gr.State([])

    submit_btn.click(
        chatbot,
        [msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
        [chatbot_output, chat_history, msg]
    )
    msg.submit(
        chatbot,
        [msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
        [chatbot_output, chat_history, msg]
    )

if __name__ == "__main__":
    demo.launch()