import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch model_id = "deepseek-ai/deepseek-coder-7b-base" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, device_map="auto" ) # Create pipeline pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) # Memory to store chat history chat_history = [] # Format prompt with history def format_prompt(history, user_input): prompt = "" for i, (user, bot) in enumerate(history): prompt += f"User: {user}\nAssistant: {bot}\n" prompt += f"User: {user_input}\nAssistant:" return prompt # Chat function def chat(user_input): global chat_history prompt = format_prompt(chat_history, user_input) output = pipe(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)[0]['generated_text'] # Extract new response only (everything after last "Assistant:") assistant_response = output.split("Assistant:")[-1].strip() # Add to history chat_history.append((user_input, assistant_response)) # Build full conversation display chat_display = "" for user, bot in chat_history: chat_display += f"๐Ÿง‘โ€๐Ÿ’ป User: {user}\n๐Ÿค– Assistant: {bot}\n\n" return chat_display.strip() # Reset chat def reset_chat(): global chat_history chat_history = [] return "" # Gradio UI with gr.Blocks(title="๐Ÿง  DeepSeek 7B Chat with Memory") as demo: gr.Markdown("## ๐Ÿค– DeepSeek Coder R1 7B\nChat with memory. Ask coding questions or continue a conversation.") chatbot = gr.Textbox(lines=20, interactive=False, label="Chat History") msg = gr.Textbox(label="Type your message here", placeholder="What can I help you with today?") send_btn = gr.Button("Send") clear_btn = gr.Button("๐Ÿงน Clear Chat") send_btn.click(chat, inputs=msg, outputs=chatbot) clear_btn.click(reset_chat, outputs=chatbot) demo.launch()