joaogante HF staff commited on
Commit
8629969
·
verified ·
1 Parent(s): b8c8863

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -8,7 +8,7 @@ import torch
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
9
 
10
 
11
- model_id = "Qwen/Qwen2.5-32B-Instruct"
12
  assistant_id = "Qwen/Qwen2.5-0.5B-Instruct"
13
 
14
  model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, device_map="auto")
@@ -61,8 +61,8 @@ def reset_textbox():
61
  with gr.Blocks() as demo:
62
  gr.Markdown(
63
  "# 🤗 Assisted Generation Demo\n"
64
- f"- Model: {model_id} (4-bit quant, ~16GB)\n"
65
- f"- Assistant Model: {assistant_id} (FP16, ~1GB)\n"
66
  "- Recipe for speedup: a) >10x model size difference in parameters; b) assistant trained similarly; c) CPU is not a bottleneck"
67
  )
68
 
 
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
9
 
10
 
11
+ model_id = "Qwen/Qwen2.5-14B-Instruct"
12
  assistant_id = "Qwen/Qwen2.5-0.5B-Instruct"
13
 
14
  model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, device_map="auto")
 
61
  with gr.Blocks() as demo:
62
  gr.Markdown(
63
  "# 🤗 Assisted Generation Demo\n"
64
+ f"- Model: {model_id} (4-bit quant, 14B params, GPU memory = ~7GB)\n"
65
+ f"- Assistant Model: {assistant_id} (FP16, 0.5B params, GPU memory = ~1GB)\n"
66
  "- Recipe for speedup: a) >10x model size difference in parameters; b) assistant trained similarly; c) CPU is not a bottleneck"
67
  )
68