KaiChen1998 commited on
Commit
7c2502a
·
1 Parent(s): 23991fe
Files changed (1) hide show
  1. app.py +44 -25
app.py CHANGED
@@ -71,6 +71,26 @@ def run_llm_reasoning(caption, question, answer):
71
  output = llm.generate([{"prompt": prompt}], sampling_params=llm_sampling)
72
  return output[0].outputs[0].text
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  ##########################################
75
  # Gradio part
76
  ##########################################
@@ -136,43 +156,42 @@ def http_bot(state):
136
  return
137
 
138
  # Retrive prompt
139
- logging.info(state.messages)
140
- logging.info("=================")
141
- logging.info(str(state.messages))
142
- logging.info("=================")
143
- logging.info(state.messages[-1])
144
- logging.info("=================")
145
- logging.info(state.messages[-1][-1])
146
- logging.info("=================")
147
- logging.info(state.messages[-1][-1][0])
148
- logging.info("=================")
149
  prompt = state.messages[-1][-1][0]
150
  all_images = state.get_images(return_pil=True)[0]
151
  pload = {"prompt": prompt, "images": f'List of {len(state.get_images())} images: {all_images}'}
152
  logging.info(f"==== request ====\n{pload}")
153
 
154
- return
155
-
156
  # Construct prompt
157
  cap_msgs, qa_msgs = build_messages(all_images, prompt)
158
  cap_prompt = processor.apply_chat_template([cap_msgs], tokenize=False, add_generation_prompt=True)
159
  qa_prompt = processor.apply_chat_template([qa_msgs], tokenize=False, add_generation_prompt=True)
160
-
161
  image_tensor, _ = process_vision_info(cap_msgs)
162
- tentative_answer = run_mllm_tentative(image_tensor, cap_prompt, qa_prompt)
163
- state.append_message(state.roles[1], "# Tentative Response\n\n" + tentative_answer)
164
- logging.info("# Tentative Response\n\n" + tentative_answer)
165
- yield (state, state.to_gradio_chatbot_public()) + (disable_btn,) * 2
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- caption_text = run_mllm_caption(image_tensor, cap_prompt, qa_prompt)
168
- state.append_message(state.roles[1], "# Caption\n\n" + caption_text)
169
- logging.info("# Caption\n\n" + caption_text)
170
- yield (state, state.to_gradio_chatbot_public()) + (disable_btn,) * 2
171
 
172
- final_answer = run_llm_reasoning(caption_text, QUESTION, tentative_answer)
173
- state.append_message(state.roles[1], "# Final Response\n\n" + final_answer)
174
- logging.info("# Final Response\n\n" + final_answer)
175
- yield (state, state.to_gradio_chatbot_public()) + (enable_btn,) * 2
176
 
177
  ############
178
  # Layout Markdown
 
71
  output = llm.generate([{"prompt": prompt}], sampling_params=llm_sampling)
72
  return output[0].outputs[0].text
73
 
74
+ ##########################################
75
+ # Streaming
76
+ ##########################################
77
+ mllm_streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
78
+ llm_streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
79
+
80
+ def stream_response(model, inputs, streamer, prompt, gen_kwargs):
81
+ thread = Thread(target=model.generate, kwargs=dict(
82
+ streamer=streamer,
83
+ **inputs,
84
+ **gen_kwargs
85
+ )
86
+ )
87
+ thread.start()
88
+
89
+ generated_text = prompt
90
+ for new_text in streamer:
91
+ generated_text += new_text
92
+ yield generated_text
93
+
94
  ##########################################
95
  # Gradio part
96
  ##########################################
 
156
  return
157
 
158
  # Retrive prompt
 
 
 
 
 
 
 
 
 
 
159
  prompt = state.messages[-1][-1][0]
160
  all_images = state.get_images(return_pil=True)[0]
161
  pload = {"prompt": prompt, "images": f'List of {len(state.get_images())} images: {all_images}'}
162
  logging.info(f"==== request ====\n{pload}")
163
 
 
 
164
  # Construct prompt
165
  cap_msgs, qa_msgs = build_messages(all_images, prompt)
166
  cap_prompt = processor.apply_chat_template([cap_msgs], tokenize=False, add_generation_prompt=True)
167
  qa_prompt = processor.apply_chat_template([qa_msgs], tokenize=False, add_generation_prompt=True)
 
168
  image_tensor, _ = process_vision_info(cap_msgs)
169
+ cap_inputs = processor(text=[cap_prompt], images=image_tensor, return_tensors="pt").to(mllm.device)
170
+ qa_inputs = processor(text=[qa_prompt], images=image_tensor, return_tensors="pt").to(mllm.device)
171
+
172
+ # Step 1: Tentative Response
173
+ state.append_message(state.roles[1], "# Tentative Response\n\n▌")
174
+ try:
175
+ for generated_text in stream_response(mllm, qa_inputs, mllm_streamer, qa_prompt, mllm_sampling):
176
+ output = generated_text[len(prompt):].strip()
177
+ state.messages[-1][-1] = "# Tentative Response\n\n" + output + "▌"
178
+ yield (state, state.to_gradio_chatbot_public()) + (disable_btn,) * 2
179
+ except Exception as e:
180
+ os.system("nvidia-smi")
181
+ logging.info(traceback.print_exc())
182
+ state.messages[-1][-1] = server_error_msg
183
+ yield (state, state.to_gradio_chatbot_public()) + (enable_btn,) * 2
184
+ return
185
 
186
+ # caption_text = run_mllm_caption(image_tensor, cap_prompt, qa_prompt)
187
+ # state.append_message(state.roles[1], "# Caption\n\n" + caption_text)
188
+ # logging.info("# Caption\n\n" + caption_text)
189
+ # yield (state, state.to_gradio_chatbot_public()) + (disable_btn,) * 2
190
 
191
+ # final_answer = run_llm_reasoning(caption_text, QUESTION, tentative_answer)
192
+ # state.append_message(state.roles[1], "# Final Response\n\n" + final_answer)
193
+ # logging.info("# Final Response\n\n" + final_answer)
194
+ # yield (state, state.to_gradio_chatbot_public()) + (enable_btn,) * 2
195
 
196
  ############
197
  # Layout Markdown