Di Zhang commited on
Commit
22dfef8
·
verified ·
1 Parent(s): 022534e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -5,6 +5,9 @@ import os
5
  import gradio as gr
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
  from huggingface_hub import hf_hub_download, snapshot_download
 
 
 
8
 
9
  # Load the model and tokenizer from Hugging Face
10
  model_path = snapshot_download(
@@ -38,7 +41,7 @@ def llama_o1_template(data):
38
  @spaces.GPU
39
  def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
40
  input_text = llama_o1_template(message)
41
- inputs = tokenizer(input_text, return_tensors="pt")
42
 
43
  # Generate the text with the model
44
  output = model.generate(
@@ -47,7 +50,6 @@ def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95)
47
  temperature=temperature,
48
  top_p=top_p,
49
  do_sample=True,
50
- pad_token_id=tokenizer.eos_token_id,
51
  )
52
 
53
  response = tokenizer.decode(output[0], skip_special_tokens=True)
 
5
  import gradio as gr
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
  from huggingface_hub import hf_hub_download, snapshot_download
8
+ import accelerate
9
+
10
+ accelerator = accelerate.Accelerator()
11
 
12
  # Load the model and tokenizer from Hugging Face
13
  model_path = snapshot_download(
 
41
  @spaces.GPU
42
  def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
43
  input_text = llama_o1_template(message)
44
+ inputs = tokenizer(input_text, return_tensors="pt").to(accelerator.device)
45
 
46
  # Generate the text with the model
47
  output = model.generate(
 
50
  temperature=temperature,
51
  top_p=top_p,
52
  do_sample=True,
 
53
  )
54
 
55
  response = tokenizer.decode(output[0], skip_special_tokens=True)