File size: 1,809 Bytes
4d871c7 68d71c5 4d871c7 e69a5b4 4d871c7 68d71c5 4d871c7 e69a5b4 6ed2caf e69a5b4 6ed2caf e69a5b4 4d871c7 e69a5b4 6ed2caf e69a5b4 f48f0af 68d71c5 f48f0af 4d871c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline
# Load the model and tokenizer
model_id = "hsuwill000/Qwen2.5-1.5B-Instruct-openvino-8bit"
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明确指定设备
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Create generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
def respond(message, history):
try:
# Combine the entire conversation history
input_text = message
if history:
input_text = "\n".join([f"User: {h[0]}\nBot: {h[1]}" for h in history]) + f"\nUser: {message}"
# Generate response
response = pipe(
input_text,
max_length=512,
truncation=True,
num_return_sequences=1,
temperature=0.7, # 控制生成多样性
top_p=0.9, # 控制生成质量
)
reply = response[0]['generated_text'].strip()
# Update history
history.append((message, reply))
return history
except Exception as e:
print(f"Error: {e}")
return history + [(message, "Sorry, something went wrong. Please try again.")]
# Custom clear function
def clear_history():
return []
# Set up Gradio chat interface
with gr.Blocks() as demo:
gr.Markdown("# Qwen2.5-1.5B-Instruct-openvino Chat")
gr.Markdown("Chat with Qwen2.5-1.5B-Instruct-openvino model.")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Your Message")
clear_btn = gr.Button("Clear History")
msg.submit(respond, [msg, chatbot], chatbot)
clear_btn.click(clear_history, None, chatbot, queue=False)
if __name__ == "__main__":
demo.launch() |