deepseek / app.py
cheberle's picture
f
24e4297
raw
history blame
1.18 kB
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
# Define the model paths
base_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
fine_tuned_model_name = "cheberle/autotrain-35swc-b4r9z"
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model_name)
# Load the model
model = AutoModelForCausalLM.from_pretrained(
fine_tuned_model_name,
device_map="auto", # Auto-distributes model across devices
torch_dtype="auto", # Matches model precision
)
# Define the chat function
def chat(input_text):
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda") # Move input to GPU
output = model.generate(input_ids, max_length=100)
response = tokenizer.decode(output[0], skip_special_tokens=True)
return response
# Create a Gradio interface
interface = gr.Interface(
fn=chat,
inputs=gr.Textbox(lines=2, placeholder="Type your input here..."),
outputs="text",
title="Chat with DeepSeek-AutoTrain Model",
description="Fine-tuned version of DeepSeek-R1-Distill-Qwen-7B. Ask me anything!",
)
# Launch the interface
if __name__ == "__main__":
interface.launch()