Spaces:
Running
Running
File size: 1,423 Bytes
2e03cda c263659 9d9c29a e474e6b 9d9c29a e474e6b 9d9c29a af5c917 c263659 12dd231 9d9c29a c263659 9d9c29a c263659 9d9c29a c263659 9d9c29a c263659 9d9c29a 12dd231 c263659 d950da6 e474e6b 9d9c29a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
model_path = "cody82/unitrip" # путь к локальной модели
config = AutoConfig.from_pretrained(model_path, local_files_only=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
model = AutoModelForCausalLM.from_pretrained(
model_path,
config=config,
local_files_only=True,
torch_dtype=torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
)
system_message = "Ты — умный помощник по Университету Иннополис."
def respond(message, history=None):
if history is None:
history = []
prompt = f"{system_message}\nUser: {message}\nAssistant:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=128,
do_sample=False,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
use_cache=True,
)
generated_tokens = outputs[0][inputs["input_ids"].shape[1]:]
answer = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
history.append((message, answer))
return history
chat = gr.ChatInterface(fn=respond, title="Innopolis Assistant")
chat.launch()
|