Spaces:
Running
Running
import torch | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
model_id = "google/flan-t5-base" # можно попробовать flan-t5-large | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_id) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
def respond(message, history=None): | |
if history is None: | |
history = [] | |
prompt = f"Answer the following question about Innopolis University clearly and concisely.\nQuestion: {message}\nAnswer:" | |
inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=100, | |
do_sample=False, | |
eos_token_id=tokenizer.eos_token_id | |
) | |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Отрезаем если модель включила prompt в ответ | |
if answer.lower().startswith(prompt.lower()): | |
answer = answer[len(prompt):].strip() | |
history.append((message, answer)) | |
return history | |
iface = gr.ChatInterface(fn=respond, title="Innopolis Q&A") | |
iface.launch() | |