AndyDufrense's picture
Create app.py
225af82 verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load the model and tokenizer
MODEL_NAME = "deepseek-ai/deepseek-coder-1.3b-instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
# Function to generate responses
def generate_response(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_new_tokens=200, pad_token_id=tokenizer.eos_token_id)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Create a Gradio UI
iface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(label="Enter your prompt"),
outputs=gr.Textbox(label="Generated Response"),
title="DeepSeek Coder Chatbot",
description="A chatbot powered by DeepSeek Coder 1.3B"
)
iface.launch()