joaogante HF Staff commited on
Commit
19ed9d4
·
verified ·
1 Parent(s): 4e37d0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -1,9 +1,12 @@
 
 
 
 
1
  from threading import Thread
2
 
3
  import torch
4
- import gradio as gr
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
6
- import time
7
 
8
  model_id = "facebook/opt-6.7b"
9
  assistant_id = "facebook/opt-125m"
@@ -12,6 +15,8 @@ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16
12
  assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id).to(device=model.device, dtype=torch.bfloat16)
13
  tokenizer = AutoTokenizer.from_pretrained(model_id)
14
 
 
 
15
  def run_generation(user_text, use_assistant, temperature, max_new_tokens):
16
  if temperature < 0.1:
17
  do_sample = False
@@ -45,7 +50,7 @@ def run_generation(user_text, use_assistant, temperature, max_new_tokens):
45
  for new_text in streamer:
46
  model_output += new_text
47
  yield [model_output, round(time.time() - start, 3)]
48
- return [model_output, round(time.time() - start, 3)]
49
 
50
 
51
  def reset_textbox():
 
1
+ import spaces
2
+ import gradio as gr
3
+
4
+ import time
5
  from threading import Thread
6
 
7
  import torch
 
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
9
+
10
 
11
  model_id = "facebook/opt-6.7b"
12
  assistant_id = "facebook/opt-125m"
 
15
  assistant_model = AutoModelForCausalLM.from_pretrained(assistant_id).to(device=model.device, dtype=torch.bfloat16)
16
  tokenizer = AutoTokenizer.from_pretrained(model_id)
17
 
18
+
19
+ @spaces.GPU
20
  def run_generation(user_text, use_assistant, temperature, max_new_tokens):
21
  if temperature < 0.1:
22
  do_sample = False
 
50
  for new_text in streamer:
51
  model_output += new_text
52
  yield [model_output, round(time.time() - start, 3)]
53
+ # return [model_output, round(time.time() - start, 3)]
54
 
55
 
56
  def reset_textbox():