File size: 2,875 Bytes
2c4c7b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# ---------------------------------------------------------
# 1) Points to your Hugging Face repo and subfolder:
# "wuhp/myr1" is the repository
# "myr1" is the subfolder where the config/tokenizer/model are located.
# ---------------------------------------------------------
MODEL_REPO = "wuhp/myr1"
SUBFOLDER = "myr1"
# ---------------------------------------------------------
# 2) Load the tokenizer and model from the Hub
# - trust_remote_code=True allows custom config & modeling files.
# ---------------------------------------------------------
tokenizer = AutoTokenizer.from_pretrained(
MODEL_REPO,
subfolder=SUBFOLDER, # important because the model files sit inside 'myr1'
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_REPO,
subfolder=SUBFOLDER, # also needed here
trust_remote_code=True,
device_map="auto", # automatically place model layers on GPU(s) if available
torch_dtype=torch.float16, # or "auto", "float32", "bfloat16", etc. as your hardware supports
low_cpu_mem_usage=True
)
# Put the model in evaluation mode
model.eval()
def generate_text(prompt, max_length=128, temperature=0.7, top_p=0.9):
"""
Generate text from your DeepSeekR1 model, given an input prompt.
"""
# Convert to token IDs and move to model device (GPU/CPU)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate output
with torch.no_grad():
output_ids = model.generate(
**inputs,
max_length=max_length,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=tokenizer.eos_token_id, # or set to a real pad_token_id if your model uses one
)
# Decode the tokens back into a string
return tokenizer.decode(output_ids[0], skip_special_tokens=True)
# ---------------------------------------------------------
# 3) Build Gradio UI
# ---------------------------------------------------------
demo = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(
lines=5,
label="Enter your prompt",
placeholder="Type something for the DeepSeek model..."
),
gr.Slider(64, 1024, step=1, value=128, label="Max Length"),
gr.Slider(0.0, 1.5, step=0.1, value=0.7, label="Temperature"),
gr.Slider(0.0, 1.0, step=0.05, value=0.9, label="Top-p"),
],
outputs="text",
title="DeepSeek-R1 Gradio Demo",
description=(
"This Gradio interface loads the DeepSeek model from Hugging Face and lets you "
"generate text by entering a prompt. Adjust parameters to see how output changes."
)
)
if __name__ == "__main__":
demo.launch()
|