cody82 commited on
Commit
9ba4eae
·
verified ·
1 Parent(s): 5a42269

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -9
app.py CHANGED
@@ -1,20 +1,31 @@
1
  import torch
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
- import spaces
5
 
6
  model_id = "google/flan-t5-base"
7
  tokenizer = AutoTokenizer.from_pretrained(model_id)
8
  model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
9
- model.to("cuda" if torch.cuda.is_available() else "cpu")
 
10
 
11
- @spaces.GPU
12
  def respond(message, history=[]):
13
- prompt = f"Answer the question: {message}"
14
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
15
- output = model.generate(**inputs, max_new_tokens=100)
16
- response = tokenizer.decode(output[0], skip_special_tokens=True)
17
- history.append((message, response))
 
 
 
 
 
 
 
 
 
 
 
18
  return history
19
 
20
- gr.ChatInterface(fn=respond).launch()
 
 
1
  import torch
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
4
 
5
  model_id = "google/flan-t5-base"
6
  tokenizer = AutoTokenizer.from_pretrained(model_id)
7
  model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+ model.to(device)
10
 
 
11
  def respond(message, history=[]):
12
+ prompt = f"Question: {message} Answer:"
13
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
14
+
15
+ with torch.no_grad():
16
+ outputs = model.generate(
17
+ **inputs,
18
+ max_new_tokens=50,
19
+ do_sample=False,
20
+ eos_token_id=tokenizer.eos_token_id
21
+ )
22
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
23
+ # Убираем из ответа префикс prompt, если он остался
24
+ if answer.lower().startswith(prompt.lower()):
25
+ answer = answer[len(prompt):].strip()
26
+
27
+ history.append((message, answer))
28
  return history
29
 
30
+ iface = gr.ChatInterface(fn=respond, title="Innopolis Q&A")
31
+ iface.launch()