Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import T5ForConditionalGeneration, T5Tokenizer | |
import torch | |
# Load pre-trained T5 model and tokenizer | |
model_name = "t5-small" # Use t5-small for faster responses | |
model = T5ForConditionalGeneration.from_pretrained(model_name) | |
tokenizer = T5Tokenizer.from_pretrained(model_name) | |
# Set device to GPU if available for faster inference, otherwise fallback to CPU | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
# Streamlit Interface | |
st.title("Simple Chatbot with T5") | |
def generate_response(input_text): | |
# Add conversational context to input | |
input_text = f"You are a helpful assistant. {input_text}" | |
# Tokenize input text | |
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device) | |
# Generate a response from the model with advanced generation settings | |
outputs = model.generate(input_ids, | |
max_length=100, # max length of the output sequence | |
num_beams=5, # Beam search for better results | |
top_p=0.95, # Top-p sampling for more variety | |
temperature=0.7, # Temperature controls randomness | |
no_repeat_ngram_size=2, # Prevent repetition of n-grams | |
pad_token_id=tokenizer.eos_token_id) # Padding token to avoid padding tokens being part of the output | |
# Decode the model's output to a readable string | |
bot_output = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return bot_output | |
# Create input box for user to type a message | |
user_input = st.text_input("You: ", "") | |
if user_input: | |
# Generate and display the bot's response | |
response = generate_response(user_input) | |
st.write(f"Bot: {response}") | |