yejunliang23 commited on
Commit
4e24b8a
Β·
unverified Β·
1 Parent(s): d5b7fec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -62
app.py CHANGED
@@ -9,8 +9,6 @@ from trimesh.exchange.gltf import export_glb
9
  import numpy as np
10
  import tempfile
11
  import copy
12
- from dashscope import MultiModalConversation
13
- import dashscope
14
 
15
  def _remove_image_special(text):
16
  text = text.replace('<ref>', '').replace('</ref>', '')
@@ -21,66 +19,6 @@ def is_video_file(filename):
21
  video_extensions = ['.mp4', '.avi', '.mkv', '.mov', '.wmv', '.flv', '.webm', '.mpeg']
22
  return any(filename.lower().endswith(ext) for ext in video_extensions)
23
 
24
- def predict_(_chatbot, task_history):
25
- chat_query = _chatbot[-1][0]
26
- query = task_history[-1][0]
27
- if len(chat_query) == 0:
28
- _chatbot.pop()
29
- task_history.pop()
30
- return _chatbot
31
- print("User: " + _parse_text(query))
32
- history_cp = copy.deepcopy(task_history)
33
- full_response = ""
34
- messages = []
35
- content = []
36
- for q, a in history_cp:
37
- if isinstance(q, (tuple, list)):
38
- if is_video_file(q[0]):
39
- content.append({'video': f'file://{q[0]}'})
40
- else:
41
- content.append({'image': f'file://{q[0]}'})
42
- else:
43
- content.append({'text': q})
44
- messages.append({'role': 'user', 'content': content})
45
- messages.append({'role': 'assistant', 'content': [{'text': a}]})
46
- content = []
47
- messages.pop()
48
- responses = MultiModalConversation.call(
49
- model="Qwen/Qwen2.5-VL-3B-Instruct", messages=messages, stream=True,
50
- )
51
- for response in responses:
52
- if not response.status_code == HTTPStatus.OK:
53
- raise HTTPError(f'response.code: {response.code}\nresponse.message: {response.message}')
54
- response = response.output.choices[0].message.content
55
- response_text = []
56
- for ele in response:
57
- if 'text' in ele:
58
- response_text.append(ele['text'])
59
- elif 'box' in ele:
60
- response_text.append(ele['box'])
61
- response_text = ''.join(response_text)
62
- _chatbot[-1] = (_parse_text(chat_query), _remove_image_special(response_text))
63
- yield _chatbot
64
-
65
- if len(response) > 1:
66
- result_image = response[-1]['result_image']
67
- resp = requests.get(result_image)
68
- os.makedirs(uploaded_file_dir, exist_ok=True)
69
- name = f"tmp{secrets.token_hex(20)}.jpg"
70
- filename = os.path.join(uploaded_file_dir, name)
71
- with open(filename, 'wb') as f:
72
- f.write(resp.content)
73
- response = ''.join(r['box'] if 'box' in r else r['text'] for r in response[:-1])
74
- _chatbot.append((None, (filename,)))
75
- else:
76
- response = response[0]['text']
77
- _chatbot[-1] = (_parse_text(chat_query), response)
78
- full_response = _parse_text(response)
79
-
80
- task_history[-1] = (query, full_response)
81
- print("Qwen2.5-VL-Chat: " + _parse_text(full_response))
82
- yield _chatbot
83
-
84
  def predict(_chatbot, task_history):
85
  chat_query = _chatbot[-1][0]
86
  query = task_history[-1][0]
 
9
  import numpy as np
10
  import tempfile
11
  import copy
 
 
12
 
13
  def _remove_image_special(text):
14
  text = text.replace('<ref>', '').replace('</ref>', '')
 
19
  video_extensions = ['.mp4', '.avi', '.mkv', '.mov', '.wmv', '.flv', '.webm', '.mpeg']
20
  return any(filename.lower().endswith(ext) for ext in video_extensions)
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def predict(_chatbot, task_history):
23
  chat_query = _chatbot[-1][0]
24
  query = task_history[-1][0]