Spaces:
Runtime error
Runtime error
File size: 2,021 Bytes
a72fea7 3dc2f1d 832a4d2 a72fea7 3dc2f1d 832a4d2 3dc2f1d 04cf79a a72fea7 3dc2f1d 832a4d2 3dc2f1d 832a4d2 a72fea7 3dc2f1d 35ddf38 3dc2f1d 6300d69 3dc2f1d 99b9339 3dc2f1d 99b9339 3dc2f1d 99b9339 029560f 3dc2f1d 029560f 3dc2f1d 029560f 3dc2f1d 6300d69 3dc2f1d a72fea7 3dc2f1d a72fea7 3dc2f1d a72fea7 3dc2f1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
from transformers import TextStreamer
from unsloth import FastLanguageModel
# Define constants
max_seq_length = 2048
dtype = None
model_name_or_path = "michailroussos/model_llama_8d"
# Load the model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=model_name_or_path,
max_seq_length=max_seq_length,
dtype=dtype,
load_in_4bit=True,
)
# Optimize model for inference
FastLanguageModel.for_inference(model)
# Function to generate a response
def chat_with_model(user_message, chat_history=None):
try:
# Prepare the input messages
messages = [{"role": "user", "content": user_message}]
# Tokenize and prepare inputs for the model
inputs = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt",
).to("cuda")
# Generate response
output_ids = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"], # Ensure attention_mask is included
streamer=None, # Collect output as tensor
max_new_tokens=128,
use_cache=True,
temperature=1.5,
min_p=0.1,
)
# Decode the generated tokens into a string
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Append the response to the chat history
if chat_history is None:
chat_history = []
chat_history.append((user_message, response))
return "", chat_history
except Exception as e:
return f"Error: {str(e)}", chat_history
# Create the chat interface
demo = gr.ChatInterface(
fn=chat_with_model,
chatbot=gr.Chatbot(label="Chat with Hugging Face Model"),
title="Hugging Face Chat Model",
description="Chat with a Hugging Face model using FastLanguageModel.",
)
# Launch the app
if __name__ == "__main__":
demo.launch()
|