Spaces:

cheberle
/

deepseek

Paused

File size: 1,182 Bytes

from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr

# Define the model paths
base_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
fine_tuned_model_name = "cheberle/autotrain-35swc-b4r9z"

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model_name)

# Load the model
model = AutoModelForCausalLM.from_pretrained(
    fine_tuned_model_name,
    device_map="auto",  # Auto-distributes model across devices
    torch_dtype="auto", # Matches model precision
)

# Define the chat function
def chat(input_text):
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")  # Move input to GPU
    output = model.generate(input_ids, max_length=100)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

# Create a Gradio interface
interface = gr.Interface(
    fn=chat,
    inputs=gr.Textbox(lines=2, placeholder="Type your input here..."),
    outputs="text",
    title="Chat with DeepSeek-AutoTrain Model",
    description="Fine-tuned version of DeepSeek-R1-Distill-Qwen-7B. Ask me anything!",
)

# Launch the interface
if __name__ == "__main__":
    interface.launch()