hsuwill000 commited on
Commit
e69a5b4
·
verified ·
1 Parent(s): 68d71c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -13
app.py CHANGED
@@ -1,30 +1,51 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  from optimum.intel import OVModelForCausalLM
4
  from transformers import AutoTokenizer, pipeline
5
 
6
  # Load the model and tokenizer
7
  model_id = "hsuwill000/Qwen2.5-1.5B-Instruct-openvino-8bit"
8
- model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
 
11
  # Create generation pipeline
12
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
13
 
14
  def respond(message, history):
15
- # Combine current message with previous history
16
- input_text = message if not history else history[-1]["value"] + " " + message
17
- # Get model's response
18
- response = pipe(input_text, max_length=512, truncation=True, num_return_sequences=1)
19
- reply = response[0]['generated_text']
20
-
21
- # Return new message format
22
- print(f"Message: {message}")
23
- print(f"Reply: {reply}")
24
- return [{"role": "bot", "value": reply}]
 
 
 
 
 
 
 
 
 
 
25
 
 
 
 
 
26
  # Set up Gradio chat interface
27
- demo = gr.ChatInterface(fn=respond, title="Qwen2.5-3B-Instruct-openvino", description="Qwen2.5-3B-Instruct-openvino", type='chatbot')
 
 
 
 
 
 
 
 
28
 
29
  if __name__ == "__main__":
30
  demo.launch()
 
1
  import gradio as gr
 
2
  from optimum.intel import OVModelForCausalLM
3
  from transformers import AutoTokenizer, pipeline
4
 
5
  # Load the model and tokenizer
6
  model_id = "hsuwill000/Qwen2.5-1.5B-Instruct-openvino-8bit"
7
+ model = OVModelForCausalLM.from_pretrained(model_id, device="CPU") # 明确指定设备
8
  tokenizer = AutoTokenizer.from_pretrained(model_id)
9
 
10
  # Create generation pipeline
11
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
12
 
13
  def respond(message, history):
14
+ try:
15
+ # Combine the entire conversation history
16
+ input_text = message
17
+ if history:
18
+ input_text = "\n".join([f"User: {h['user']}\nBot: {h['bot']}" for h in history]) + f"\nUser: {message}"
19
+
20
+ # Generate response
21
+ response = pipe(
22
+ input_text,
23
+ max_length=512,
24
+ truncation=True,
25
+ num_return_sequences=1,
26
+ temperature=0.7, # 控制生成多样性
27
+ top_p=0.9, # 控制生成质量
28
+ )
29
+ reply = response[0]['generated_text'].strip()
30
+
31
+ # Update history
32
+ history.append({"user": message, "bot": reply})
33
+ return history
34
 
35
+ except Exception as e:
36
+ print(f"Error: {e}")
37
+ return history + [{"user": message, "bot": "Sorry, something went wrong. Please try again."}]
38
+
39
  # Set up Gradio chat interface
40
+ demo = gr.ChatInterface(
41
+ fn=respond,
42
+ title="Qwen2.5-1.5B-Instruct-openvino",
43
+ description="Chat with Qwen2.5-1.5B-Instruct-openvino model.",
44
+ examples=["Hello!", "Tell me a joke.", "Explain quantum computing."],
45
+ retry_btn=None,
46
+ undo_btn=None,
47
+ clear_btn="Clear History",
48
+ )
49
 
50
  if __name__ == "__main__":
51
  demo.launch()