Spaces:

SimpleBerry
/

LLaMA-O1-Supervised-1129-Demo

Running

Di Zhang commited on Dec 2, 2024

Commit

22dfef8

verified ·

1 Parent(s): 022534e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,6 +5,9 @@ import os
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import hf_hub_download, snapshot_download
 # Load the model and tokenizer from Hugging Face
 model_path = snapshot_download(
@@ -38,7 +41,7 @@ def llama_o1_template(data):
 @spaces.GPU
 def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
     input_text = llama_o1_template(message)
-    inputs = tokenizer(input_text, return_tensors="pt")
     # Generate the text with the model
     output = model.generate(
@@ -47,7 +50,6 @@ def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95)
         temperature=temperature,
         top_p=top_p,
         do_sample=True,
-        pad_token_id=tokenizer.eos_token_id,
     )
     response = tokenizer.decode(output[0], skip_special_tokens=True)

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import hf_hub_download, snapshot_download
+import accelerate
+accelerator = accelerate.Accelerator()
 # Load the model and tokenizer from Hugging Face
 model_path = snapshot_download(
 @spaces.GPU
 def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
     input_text = llama_o1_template(message)
+    inputs = tokenizer(input_text, return_tensors="pt").to(accelerator.device)
     # Generate the text with the model
     output = model.generate(
         temperature=temperature,
         top_p=top_p,
         do_sample=True,
     )
     response = tokenizer.decode(output[0], skip_special_tokens=True)