Spaces:

hsuwill000
/

Qwen2.5-1.5B-Instruct-openvino-8bit

Paused

File size: 1,809 Bytes

4d871c7
 
 
 
68d71c5
4d871c7
e69a5b4
4d871c7
 
68d71c5
4d871c7
 
 
e69a5b4
 
 
 
6ed2caf
e69a5b4
 
 
 
 
 
 
 
 
 
 
 
 
6ed2caf
e69a5b4
4d871c7
e69a5b4
 
6ed2caf
e69a5b4
f48f0af
 
 
 
68d71c5
f48f0af
 
 
 
 
 
 
 
 
 
4d871c7

import gradio as gr
from optimum.intel import OVModelForCausalLM
from transformers import AutoTokenizer, pipeline

# Load the model and tokenizer
model_id = "hsuwill000/Qwen2.5-1.5B-Instruct-openvino-8bit"
model = OVModelForCausalLM.from_pretrained(model_id, device="CPU")  # 明确指定设备
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Create generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def respond(message, history):
    try:
        # Combine the entire conversation history
        input_text = message
        if history:
            input_text = "\n".join([f"User: {h[0]}\nBot: {h[1]}" for h in history]) + f"\nUser: {message}"
        
        # Generate response
        response = pipe(
            input_text,
            max_length=512,
            truncation=True,
            num_return_sequences=1,
            temperature=0.7,  # 控制生成多样性
            top_p=0.9,        # 控制生成质量
        )
        reply = response[0]['generated_text'].strip()
        
        # Update history
        history.append((message, reply))
        return history
    
    except Exception as e:
        print(f"Error: {e}")
        return history + [(message, "Sorry, something went wrong. Please try again.")]

# Custom clear function
def clear_history():
    return []

# Set up Gradio chat interface
with gr.Blocks() as demo:
    gr.Markdown("# Qwen2.5-1.5B-Instruct-openvino Chat")
    gr.Markdown("Chat with Qwen2.5-1.5B-Instruct-openvino model.")
    
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Your Message")
    clear_btn = gr.Button("Clear History")
    
    msg.submit(respond, [msg, chatbot], chatbot)
    clear_btn.click(clear_history, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch()