Spaces:

michailroussos
/

ID2223_9D_withGPU

Runtime error

michailroussos

3bc8976 6 months ago

2.45 kB

	import gradio as gr
	from transformers import TextStreamer
	from unsloth import FastLanguageModel
	import torch

	# Model Configuration
	max_seq_length = 2048
	dtype = None
	model_name_or_path = "michailroussos/model_llama_8d"

	# Load Model and Tokenizer
	print("Loading model...")
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=model_name_or_path,
	max_seq_length=max_seq_length,
	dtype=dtype,
	load_in_4bit=True,
	)
	FastLanguageModel.for_inference(model) # Enable faster inference
	print("Model loaded successfully!")

	# Gradio Response Function
	def respond(message, max_new_tokens, temperature, system_message=""):
	try:
	# Prepare input messages
	messages = [{"role": "system", "content": system_message}] if system_message else []
	messages.append({"role": "user", "content": message})

	# Debug: Show messages
	print("[DEBUG] Messages:", messages)

	# Tokenize inputs
	inputs = tokenizer.apply_chat_template(
	messages,
	tokenize=True,
	add_generation_prompt=True,
	return_tensors="pt",
	).to("cuda")

	# Debug: Inspect input tensors
	print("[DEBUG] Tokenized input IDs:", inputs["input_ids"].shape)
	print("[DEBUG] Attention mask:", inputs["attention_mask"].shape)

	# Stream response
	text_streamer = TextStreamer(tokenizer, skip_prompt=True)
	response = model.generate(
	input_ids=inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	use_cache=True,
	streamer=text_streamer,
	)

	return response
	except Exception as e:
	# Debug: Log errors
	print("[ERROR]", str(e))
	return f"Error: {str(e)}"

	# Gradio UI
	demo = gr.Interface(
	fn=respond,
	inputs=[
	gr.Textbox(label="Your Message", placeholder="Enter your prompt here..."),
	gr.Slider(minimum=1, maximum=512, step=1, value=128, label="Max New Tokens"),
	gr.Slider(minimum=0.1, maximum=2.0, step=0.1, value=1.0, label="Temperature"),
	gr.Textbox(label="System Message", placeholder="Optional system instructions."),
	],
	outputs="text",
	title="LLama-based Chatbot",
	description="Interact with the model. Enter a prompt and receive a response.",
	)

	if __name__ == "__main__":
	demo.launch(share=True)