Spaces:

louiismiro
/

namas

Runtime error

File size: 1,174 Bytes

698a00c
d84e01b
 
698a00c
d84e01b
698a00c
 
d84e01b
698a00c
d84e01b
 
 
 
 
698a00c
d84e01b
 
 
 
 
 
 
698a00c
 
 
d84e01b
 
698a00c
d84e01b
 
698a00c
d84e01b
698a00c
 
 
d84e01b

from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import torch

# Define model name
MODEL_NAME = "SeaLLMs/SeaLLM-7B-v2.5"

# Load the model and tokenizer with optimized settings
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,  # Use float16 for GPU optimization
    device_map="auto"          # Automatically assign to available GPUs
)

# Chatbot function
def chatbot(prompt):
    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    # Generate response
    outputs = model.generate(inputs.input_ids, max_new_tokens=150, temperature=0.7)
    # Decode and return response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Gradio Interface
iface = gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(label="Ask me anything:", lines=3, placeholder="Type your message here..."),
    outputs=gr.Textbox(label="Response"),
    title="SeaLLM Chatbot",
    description="A chatbot powered by SeaLLM-7B-v2.5 for text generation.",
)

if __name__ == "__main__":
    iface.launch()