import torch from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr model_id = "openai-community/gpt2" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) system_message = "Ты — умный помощник по Университету Иннополис." def respond(user_message, history): if history is None: history = [] # Формируем полный контекст (если нужно) prompt = system_message + "\n" for user_text, bot_text in history: prompt += f"User: {user_text}\nAssistant: {bot_text}\n" prompt += f"User: {user_message}\nAssistant:" inputs = tokenizer(prompt, return_tensors="pt").to(device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=150, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, do_sample=False, ) generated_text = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip() history.append((user_message, generated_text)) return history, history with gr.Blocks() as demo: chatbot = gr.Chatbot() message = gr.Textbox(placeholder="Введите вопрос...") state = gr.State([]) # История сообщений message.submit(respond, inputs=[message, state], outputs=[chatbot, state]) message.submit(lambda: "", None, message) # Очистить поле ввода после отправки demo.launch(share=True)