Spaces:
Runtime error
Runtime error
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import gradio as gr | |
import torch | |
# Define model name | |
MODEL_NAME = "SeaLLMs/SeaLLM-7B-v2.5" | |
# Load the model and tokenizer with optimized settings | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
torch_dtype=torch.float16, # Use float16 for GPU optimization | |
device_map="auto" # Automatically assign to available GPUs | |
) | |
# Chatbot function | |
def chatbot(prompt): | |
# Tokenize input | |
inputs = tokenizer(prompt, return_tensors="pt").to("cuda") | |
# Generate response | |
outputs = model.generate(inputs.input_ids, max_new_tokens=150, temperature=0.7) | |
# Decode and return response | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=chatbot, | |
inputs=gr.Textbox(label="Ask me anything:", lines=3, placeholder="Type your message here..."), | |
outputs=gr.Textbox(label="Response"), | |
title="SeaLLM Chatbot", | |
description="A chatbot powered by SeaLLM-7B-v2.5 for text generation.", | |
) | |
if __name__ == "__main__": | |
iface.launch() | |