File size: 2,875 Bytes
2c4c7b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# ---------------------------------------------------------
# 1) Points to your Hugging Face repo and subfolder:
#    "wuhp/myr1" is the repository
#    "myr1" is the subfolder where the config/tokenizer/model are located.
# ---------------------------------------------------------
MODEL_REPO = "wuhp/myr1"
SUBFOLDER = "myr1"

# ---------------------------------------------------------
# 2) Load the tokenizer and model from the Hub
#    - trust_remote_code=True allows custom config & modeling files.
# ---------------------------------------------------------
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_REPO,
    subfolder=SUBFOLDER,         # important because the model files sit inside 'myr1'
    trust_remote_code=True
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_REPO,
    subfolder=SUBFOLDER,         # also needed here
    trust_remote_code=True,
    device_map="auto",           # automatically place model layers on GPU(s) if available
    torch_dtype=torch.float16,    # or "auto", "float32", "bfloat16", etc. as your hardware supports
    low_cpu_mem_usage=True
)

# Put the model in evaluation mode
model.eval()


def generate_text(prompt, max_length=128, temperature=0.7, top_p=0.9):
    """
    Generate text from your DeepSeekR1 model, given an input prompt.
    """
    # Convert to token IDs and move to model device (GPU/CPU)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate output
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_length=max_length,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,  # or set to a real pad_token_id if your model uses one
        )

    # Decode the tokens back into a string
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)


# ---------------------------------------------------------
# 3) Build Gradio UI
# ---------------------------------------------------------
demo = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(
            lines=5,
            label="Enter your prompt",
            placeholder="Type something for the DeepSeek model..."
        ),
        gr.Slider(64, 1024, step=1, value=128, label="Max Length"),
        gr.Slider(0.0, 1.5, step=0.1, value=0.7, label="Temperature"),
        gr.Slider(0.0, 1.0, step=0.05, value=0.9, label="Top-p"),
    ],
    outputs="text",
    title="DeepSeek-R1 Gradio Demo",
    description=(
        "This Gradio interface loads the DeepSeek model from Hugging Face and lets you "
        "generate text by entering a prompt. Adjust parameters to see how output changes."
    )
)

if __name__ == "__main__":
    demo.launch()