Spaces:

hsuwill000
/

Qwen2.5-1.5B-Instruct-openvino-8bit

Sleeping

Update app.py

e69a5b4 verified 6 months ago

1.7 kB

	import gradio as gr
	from optimum.intel import OVModelForCausalLM
	from transformers import AutoTokenizer, pipeline

	# Load the model and tokenizer
	model_id = "hsuwill000/Qwen2.5-1.5B-Instruct-openvino-8bit"
	model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明确指定设备
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	# Create generation pipeline
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

	def respond(message, history):
	try:
	# Combine the entire conversation history
	input_text = message
	if history:
	input_text = "\n".join([f"User: {h['user']}\nBot: {h['bot']}" for h in history]) + f"\nUser: {message}"

	# Generate response
	response = pipe(
	input_text,
	max_length=512,
	truncation=True,
	num_return_sequences=1,
	temperature=0.7, # 控制生成多样性
	top_p=0.9, # 控制生成质量
	)
	reply = response[0]['generated_text'].strip()

	# Update history
	history.append({"user": message, "bot": reply})
	return history

	except Exception as e:
	print(f"Error: {e}")
	return history + [{"user": message, "bot": "Sorry, something went wrong. Please try again."}]

	# Set up Gradio chat interface
	demo = gr.ChatInterface(
	fn=respond,
	title="Qwen2.5-1.5B-Instruct-openvino",
	description="Chat with Qwen2.5-1.5B-Instruct-openvino model.",
	examples=["Hello!", "Tell me a joke.", "Explain quantum computing."],
	retry_btn=None,
	undo_btn=None,
	clear_btn="Clear History",
	)

	if __name__ == "__main__":
	demo.launch()