import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer # Load the model and tokenizer MODEL_NAME = "deepseek-ai/deepseek-coder-1.3b-instruct" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto") # Function to generate responses def generate_response(prompt): inputs = tokenizer(prompt, return_tensors="pt").to("cuda") input_ids = inputs["input_ids"] attention_mask = inputs["attention_mask"] outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_new_tokens=200, pad_token_id=tokenizer.eos_token_id) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Create a Gradio UI iface = gr.Interface( fn=generate_response, inputs=gr.Textbox(label="Enter your prompt"), outputs=gr.Textbox(label="Generated Response"), title="DeepSeek Coder Chatbot", description="A chatbot powered by DeepSeek Coder 1.3B" ) iface.launch()