hsuwill000's picture
Update app.py
e69a5b4 verified
raw
history blame
1.7 kB
import gradio as gr
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline
# Load the model and tokenizer
model_id = "hsuwill000/Qwen2.5-1.5B-Instruct-openvino-8bit"
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明确指定设备
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Create generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
def respond(message, history):
try:
# Combine the entire conversation history
input_text = message
if history:
input_text = "\n".join([f"User: {h['user']}\nBot: {h['bot']}" for h in history]) + f"\nUser: {message}"
# Generate response
response = pipe(
input_text,
max_length=512,
truncation=True,
num_return_sequences=1,
temperature=0.7, # 控制生成多样性
top_p=0.9, # 控制生成质量
)
reply = response[0]['generated_text'].strip()
# Update history
history.append({"user": message, "bot": reply})
return history
except Exception as e:
print(f"Error: {e}")
return history + [{"user": message, "bot": "Sorry, something went wrong. Please try again."}]
# Set up Gradio chat interface
demo = gr.ChatInterface(
fn=respond,
title="Qwen2.5-1.5B-Instruct-openvino",
description="Chat with Qwen2.5-1.5B-Instruct-openvino model.",
examples=["Hello!", "Tell me a joke.", "Explain quantum computing."],
retry_btn=None,
undo_btn=None,
clear_btn="Clear History",
)
if __name__ == "__main__":
demo.launch()