wuhp commited on
Commit
5a9af80
·
verified ·
1 Parent(s): 3537f55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -42
app.py CHANGED
@@ -2,80 +2,60 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
- # ---------------------------------------------------------
6
- # 1) Points to your Hugging Face repo and subfolder:
7
- # "wuhp/myr1" is the repository
8
- # "myr1" is the subfolder where the config/tokenizer/model are located.
9
- # ---------------------------------------------------------
10
  MODEL_REPO = "wuhp/myr1"
11
  SUBFOLDER = "myr1"
12
 
13
- # ---------------------------------------------------------
14
- # 2) Load the tokenizer and model from the Hub
15
- # - trust_remote_code=True allows custom config & modeling files.
16
- # ---------------------------------------------------------
17
  tokenizer = AutoTokenizer.from_pretrained(
18
  MODEL_REPO,
19
- subfolder=SUBFOLDER, # important because the model files sit inside 'myr1'
20
  trust_remote_code=True
21
  )
22
 
 
23
  model = AutoModelForCausalLM.from_pretrained(
24
  MODEL_REPO,
25
- subfolder=SUBFOLDER, # also needed here
26
  trust_remote_code=True,
27
- device_map="auto", # automatically place model layers on GPU(s) if available
28
- torch_dtype=torch.float16, # or "auto", "float32", "bfloat16", etc. as your hardware supports
29
  low_cpu_mem_usage=True
30
  )
31
 
32
- # Put the model in evaluation mode
33
  model.eval()
34
 
35
-
36
- def generate_text(prompt, max_length=128, temperature=0.7, top_p=0.9):
37
- """
38
- Generate text from your DeepSeekR1 model, given an input prompt.
39
- """
40
- # Convert to token IDs and move to model device (GPU/CPU)
41
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
42
-
43
- # Generate output
44
- with torch.no_grad():
45
  output_ids = model.generate(
46
  **inputs,
47
- max_length=max_length,
48
  temperature=temperature,
49
  top_p=top_p,
50
  do_sample=True,
51
- pad_token_id=tokenizer.eos_token_id, # or set to a real pad_token_id if your model uses one
52
  )
53
-
54
- # Decode the tokens back into a string
 
 
55
  return tokenizer.decode(output_ids[0], skip_special_tokens=True)
56
 
57
-
58
- # ---------------------------------------------------------
59
- # 3) Build Gradio UI
60
- # ---------------------------------------------------------
61
  demo = gr.Interface(
62
  fn=generate_text,
63
  inputs=[
64
  gr.Textbox(
65
- lines=5,
66
- label="Enter your prompt",
67
- placeholder="Type something for the DeepSeek model..."
68
  ),
69
- gr.Slider(64, 1024, step=1, value=128, label="Max Length"),
70
- gr.Slider(0.0, 1.5, step=0.1, value=0.7, label="Temperature"),
71
- gr.Slider(0.0, 1.0, step=0.05, value=0.9, label="Top-p"),
72
  ],
73
  outputs="text",
74
- title="DeepSeek-R1 Gradio Demo",
75
- description=(
76
- "This Gradio interface loads the DeepSeek model from Hugging Face and lets you "
77
- "generate text by entering a prompt. Adjust parameters to see how output changes."
78
- )
79
  )
80
 
81
  if __name__ == "__main__":
 
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
 
 
 
 
 
5
  MODEL_REPO = "wuhp/myr1"
6
  SUBFOLDER = "myr1"
7
 
 
 
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(
9
  MODEL_REPO,
10
+ subfolder=SUBFOLDER,
11
  trust_remote_code=True
12
  )
13
 
14
+ # If your GPU has <24GB VRAM, consider 8-bit or CPU offloading
15
  model = AutoModelForCausalLM.from_pretrained(
16
  MODEL_REPO,
17
+ subfolder=SUBFOLDER,
18
  trust_remote_code=True,
19
+ device_map="auto", # tries to place layers on GPU, then CPU if needed
20
+ torch_dtype=torch.float16, # or bfloat16 or float32
21
  low_cpu_mem_usage=True
22
  )
23
 
 
24
  model.eval()
25
 
26
+ def generate_text(prompt, max_length=64, temperature=0.7, top_p=0.9):
27
+ print("=== Starting generation ===")
 
 
 
 
28
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
29
+ try:
 
 
30
  output_ids = model.generate(
31
  **inputs,
32
+ max_new_tokens=max_length, # alternative to max_length
33
  temperature=temperature,
34
  top_p=top_p,
35
  do_sample=True,
36
+ pad_token_id=tokenizer.eos_token_id
37
  )
38
+ print("=== Generation complete ===")
39
+ except Exception as e:
40
+ print(f"Error during generation: {e}")
41
+ return str(e)
42
  return tokenizer.decode(output_ids[0], skip_special_tokens=True)
43
 
 
 
 
 
44
  demo = gr.Interface(
45
  fn=generate_text,
46
  inputs=[
47
  gr.Textbox(
48
+ lines=4,
49
+ label="Prompt",
50
+ placeholder="Try a short prompt, e.g., Hello!"
51
  ),
52
+ gr.Slider(8, 512, value=64, step=1, label="Max New Tokens"),
53
+ gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature"),
54
+ gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p"),
55
  ],
56
  outputs="text",
57
+ title="DeepSeek R1 Demo",
58
+ description="Generates text using the large DeepSeek model."
 
 
 
59
  )
60
 
61
  if __name__ == "__main__":