Spaces:

joaogante
/

assisted_generation_demo

Running on Zero

joaogante HF Staff commited on Mar 5

Commit

1702ae3

verified ·

1 Parent(s): 5c3f286

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from threading import Thread
 import torch
 import gradio as gr
@@ -12,6 +13,7 @@ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16
 assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id).to(device=model.device, dtype=torch.bfloat16)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 def run_generation(user_text, use_assistant, temperature, max_new_tokens):
     if temperature < 0.1:
         do_sample = False

 from threading import Thread
+import spaces
 import torch
 import gradio as gr
 assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id).to(device=model.device, dtype=torch.bfloat16)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+@spaces.GPU
 def run_generation(user_text, use_assistant, temperature, max_new_tokens):
     if temperature < 0.1:
         do_sample = False