import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

model_id = "deepseek-ai/deepseek-coder-7b-base"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Create pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Memory to store chat history
chat_history = []

# Format prompt with history
def format_prompt(history, user_input):
    prompt = ""
    for i, (user, bot) in enumerate(history):
        prompt += f"User: {user}\nAssistant: {bot}\n"
    prompt += f"User: {user_input}\nAssistant:"
    return prompt

# Chat function
def chat(user_input):
    global chat_history
    prompt = format_prompt(chat_history, user_input)
    output = pipe(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)[0]['generated_text']
    
    # Extract new response only (everything after last "Assistant:")
    assistant_response = output.split("Assistant:")[-1].strip()

    # Add to history
    chat_history.append((user_input, assistant_response))

    # Build full conversation display
    chat_display = ""
    for user, bot in chat_history:
        chat_display += f"🧑‍💻 User: {user}\n🤖 Assistant: {bot}\n\n"

    return chat_display.strip()

# Reset chat
def reset_chat():
    global chat_history
    chat_history = []
    return ""

# Gradio UI
with gr.Blocks(title="🧠 DeepSeek 7B Chat with Memory") as demo:
    gr.Markdown("## 🤖 DeepSeek Coder R1 7B\nChat with memory. Ask coding questions or continue a conversation.")
    chatbot = gr.Textbox(lines=20, interactive=False, label="Chat History")
    msg = gr.Textbox(label="Type your message here", placeholder="What can I help you with today?")
    send_btn = gr.Button("Send")
    clear_btn = gr.Button("🧹 Clear Chat")

    send_btn.click(chat, inputs=msg, outputs=chatbot)
    clear_btn.click(reset_chat, outputs=chatbot)

demo.launch()