import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch model_id = "deepseek-ai/deepseek-coder-7b-base" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", # Auto-detect GPU if available torch_dtype=torch.float16 # Use FP16 for faster, lower-memory inference ) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) def generate_code(prompt): response = pipe(prompt, max_new_tokens=200, temperature=0.7, do_sample=True) return response[0]["generated_text"] gr.Interface( fn=generate_code, inputs=gr.Textbox(lines=4, placeholder="Ask DeepSeek R1 something..."), outputs="text", title="🧠 DeepSeek Coder R1 (1.3B)", description="Running open-source DeepSeek Coder model (1.3B) on Hugging Face Spaces." ).launch()