Spaces:

hsuwill000
/

Qwen2.5-1.5B-Instruct-openvino-8bit

Running

App Files Files Community

hsuwill000 commited on Jan 20

Commit

e69a5b4

verified ·

1 Parent(s): 68d71c5

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -13

app.py CHANGED Viewed

@@ -1,30 +1,51 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 from optimum.intel import OVModelForCausalLM
 from transformers import AutoTokenizer, pipeline
 # Load the model and tokenizer
 model_id = "hsuwill000/Qwen2.5-1.5B-Instruct-openvino-8bit"
-model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Create generation pipeline
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def respond(message, history):
-    # Combine current message with previous history
-    input_text = message if not history else history[-1]["value"] + " " + message
-    # Get model's response
-    response = pipe(input_text, max_length=512, truncation=True, num_return_sequences=1)
-    reply = response[0]['generated_text']
-    # Return new message format
-    print(f"Message: {message}")
-    print(f"Reply: {reply}")
-    return [{"role": "bot", "value": reply}]
 # Set up Gradio chat interface
-demo = gr.ChatInterface(fn=respond, title="Qwen2.5-3B-Instruct-openvino", description="Qwen2.5-3B-Instruct-openvino", type='chatbot')
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from optimum.intel import OVModelForCausalLM
 from transformers import AutoTokenizer, pipeline
 # Load the model and tokenizer
 model_id = "hsuwill000/Qwen2.5-1.5B-Instruct-openvino-8bit"
+model = OVModelForCausalLM.from_pretrained(model_id, device="CPU")  # 明确指定设备
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Create generation pipeline
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def respond(message, history):
+    try:
+        # Combine the entire conversation history
+        input_text = message
+        if history:
+            input_text = "\n".join([f"User: {h['user']}\nBot: {h['bot']}" for h in history]) + f"\nUser: {message}"
+        # Generate response
+        response = pipe(
+            input_text,
+            max_length=512,
+            truncation=True,
+            num_return_sequences=1,
+            temperature=0.7,  # 控制生成多样性
+            top_p=0.9,        # 控制生成质量
+        )
+        reply = response[0]['generated_text'].strip()
+        # Update history
+        history.append({"user": message, "bot": reply})
+        return history
+    except Exception as e:
+        print(f"Error: {e}")
+        return history + [{"user": message, "bot": "Sorry, something went wrong. Please try again."}]
 # Set up Gradio chat interface
+demo = gr.ChatInterface(
+    fn=respond,
+    title="Qwen2.5-1.5B-Instruct-openvino",
+    description="Chat with Qwen2.5-1.5B-Instruct-openvino model.",
+    examples=["Hello!", "Tell me a joke.", "Explain quantum computing."],
+    retry_btn=None,
+    undo_btn=None,
+    clear_btn="Clear History",
+)
 if __name__ == "__main__":
     demo.launch()