Tonic commited on
Commit
7311b6e
·
1 Parent(s): 3853615

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -53
app.py CHANGED
@@ -85,6 +85,7 @@ image_description = ""
85
 
86
 
87
  def check_hallucination(assertion, citation):
 
88
  api_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
89
  header = {"Authorization": f"Bearer {hf_token}"}
90
  payload = {"inputs": f"{assertion} [SEP] {citation}"}
@@ -93,6 +94,7 @@ def check_hallucination(assertion, citation):
93
  output = response.json()
94
  output = output[0][0]["score"]
95
 
 
96
  return f"**hallucination score:** {output}"
97
 
98
 
@@ -104,29 +106,26 @@ headers = {"Authorization": f"Bearer {hf_token}"}
104
 
105
  # Function to query the API
106
  def query(payload):
 
107
  response = requests.post(vapi_url, headers=headers, json=payload)
 
108
  return response.json()
109
 
110
-
111
  # Function to evaluate hallucination
112
  def evaluate_hallucination(input1, input2):
113
- # Combine the inputs
114
- combined_input = f"{input1}. {input2}"
115
 
116
- # Make the API call
117
  output = query({"inputs": combined_input})
118
-
119
- # Extract the score from the output
120
  score = output[0][0]['score']
121
-
122
- # Generate a label based on the score
123
  if score < 0.5:
124
  label = f"🔴 High risk. Score: {score:.2f}"
125
  else:
126
  label = f"🟢 Low risk. Score: {score:.2f}"
127
-
128
- return label
129
 
 
 
130
 
131
  def save_audio(audio_input, output_dir="saved_audio"):
132
  if not os.path.exists(output_dir):
@@ -146,39 +145,40 @@ def save_audio(audio_input, output_dir="saved_audio"):
146
 
147
 
148
  def save_image(image_input, output_dir="saved_images"):
 
149
  if not os.path.exists(output_dir):
150
  os.makedirs(output_dir)
151
 
152
- # Assuming image_input is a NumPy array
153
  if isinstance(image_input, np.ndarray):
154
- # Convert NumPy arrays to PIL Image
155
  image = Image.fromarray(image_input)
156
-
157
- # Generate a unique file name
158
  file_name = f"image_{int(time.time())}.png"
159
  file_path = os.path.join(output_dir, file_name)
160
-
161
- # Save the image file
162
  image.save(file_path)
163
 
 
164
  return file_path
165
  else:
166
  raise ValueError("Invalid image input type")
167
 
 
168
  def process_image(image_file_path):
 
169
  client = Client("https://tonic1-official-qwen-vl-chat.hf.space/--replicas/t5ccx/") # TruEra
170
  try:
171
  result = client.predict(
172
- "Describe this image in detail, identify every detail in this image. Describe the image the best you can.", # TruEra
173
  image_file_path,
174
  fn_index=0
175
  )
 
176
  return result
177
  except Exception as e:
 
178
  return f"Error occurred during image processing: {e}"
179
 
180
 
181
  def process_speech(audio_input, source_language, target_language="English"):
 
182
  if audio_input is None:
183
  return "No audio input provided."
184
  try:
@@ -188,11 +188,14 @@ def process_speech(audio_input, source_language, target_language="English"):
188
  target_language,
189
  api_name="/s2tt"
190
  )
 
191
  return result
192
  except Exception as e:
 
193
  return f"Error in speech processing: {str(e)}"
194
 
195
  def convert_text_to_speech(input_text, source_language, target_language):
 
196
  try:
197
  result = seamless_client.predict(
198
  input_text,
@@ -203,8 +206,10 @@ def convert_text_to_speech(input_text, source_language, target_language):
203
  audio_file_path = result[0] if result else None
204
  translated_text = result[1] if result else ""
205
 
 
206
  return audio_file_path, translated_text
207
  except Exception as e:
 
208
  return None, f"Error in text-to-speech conversion: {str(e)}"
209
 
210
  def query_vectara(text):
@@ -310,8 +315,8 @@ def query_vectara(text):
310
  return f"Error: {response.status_code}"
311
 
312
 
313
- # Functions to Wrap the Prompt Correctly
314
  def wrap_text(text, width=90):
 
315
  lines = text.split('\n')
316
  wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
317
  wrapped_text = '\n'.join(wrapped_lines)
@@ -320,96 +325,82 @@ def wrap_text(text, width=90):
320
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True)
321
  model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True).eval()
322
 
323
- # TruEra
324
  class ChatBot:
325
  def __init__(self):
326
  self.history = None
327
 
328
  def predict(self, user_input, system_prompt=""):
 
329
  response, self.history = model.chat(tokenizer, user_input, history=self.history, system=system_prompt)
330
  return response
331
 
332
  bot = ChatBot()
333
 
334
- # TruEra
335
  def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
 
336
  return bot.predict(user_input, system_prompt)
337
 
338
-
339
- def process_summary_with_qwen(summary): # TruEra
340
- system_prompt = "You are a medical instructor . Assess and describe the proper options to your students in minute detail. Propose a course of action for them to base their recommendations on based on your description."
341
  response_text = bot.predict(summary, system_prompt)
342
  return response_text
343
 
344
 
 
345
  def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None):
346
  try:
347
-
348
  combined_text = ""
349
- markdown_output = ""
350
- image_text = ""
351
- language_code = None
352
-
353
- # Convert input language to its code
354
- if input_language and input_language in languages:
355
- language_code = languages[input_language]
356
-
357
- # Debugging print statement
358
  print(f"Image Input Type: {type(image_input)}, Audio Input Type: {type(audio_input)}")
359
-
360
- # Process image input
361
  if image_input is not None:
362
- # Convert image_input to a file path
363
  image_file_path = save_image(image_input)
364
  image_text = process_image(image_file_path)
365
  combined_text += "\n\n**Image Input:**\n" + image_text
366
 
367
- # Process audio input
368
  elif audio_input is not None:
 
369
  sample_rate, audio_data = audio_input
370
  audio_file_path = save_audio(audio_input)
371
- audio_text = process_speech(audio_file_path, language_code, "English")
372
  combined_text += "\n\n**Audio Input:**\n" + audio_text
373
 
374
- # Process text input
375
  elif text_input is not None and text_input.strip():
 
376
  combined_text += "The user asks the following to his health adviser: " + text_input
377
 
378
- # Check if combined text is empty
379
  else:
380
  return "Error: Please provide some input (text, audio, or image)."
381
 
382
- # Append the original image description in Markdown
383
  if image_text:
384
  markdown_output += "\n### Original Image Description\n"
385
  markdown_output += image_text + "\n"
386
-
387
- # Use the text to query Vectara
388
- vectara_response_json = query_vectara(combined_text)
389
 
390
- # Parse the Vectara response
 
391
  vectara_response = json.loads(vectara_response_json)
392
  summary = vectara_response.get('summary', 'No summary available')
393
  sources_info = vectara_response.get('sources', [])
394
 
395
- # Format Vectara response in Markdown
396
  markdown_output = "### Vectara Response Summary\n"
397
  markdown_output += f"* **Summary**: {summary}\n"
398
  markdown_output += "### Sources Information\n"
399
  for source in sources_info:
400
  markdown_output += f"* {source}\n"
401
 
402
- # Process the summary with Qwen
403
  final_response = process_summary_with_qwen(summary)
404
 
405
- # Convert translated text to speech and get both audio file and text
406
- target_language = "English"
407
  audio_output, translated_text = convert_text_to_speech(final_response, target_language, input_language)
408
-
409
- # Evaluate hallucination
410
  hallucination_label = evaluate_hallucination(final_response, summary)
411
 
412
- # Add final response and hallucination label to Markdown output
413
  markdown_output += "\n### Processed Summary with Qwen\n"
414
  markdown_output += final_response + "\n"
415
  markdown_output += "\n### Hallucination Evaluation\n"
@@ -418,8 +409,9 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
418
  markdown_output += translated_text + "\n"
419
 
420
  return markdown_output, audio_output
421
-
422
  except Exception as e:
 
423
  return f"Error occurred during processing: {e}. No hallucination evaluation.", None
424
 
425
 
 
85
 
86
 
87
  def check_hallucination(assertion, citation):
88
+ print("Entering check_hallucination function")
89
  api_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
90
  header = {"Authorization": f"Bearer {hf_token}"}
91
  payload = {"inputs": f"{assertion} [SEP] {citation}"}
 
94
  output = response.json()
95
  output = output[0][0]["score"]
96
 
97
+ print(f"check_hallucination output: {output}")
98
  return f"**hallucination score:** {output}"
99
 
100
 
 
106
 
107
  # Function to query the API
108
  def query(payload):
109
+ print("Entering query function")
110
  response = requests.post(vapi_url, headers=headers, json=payload)
111
+ print(f"API response: {response.json()}")
112
  return response.json()
113
 
 
114
  # Function to evaluate hallucination
115
  def evaluate_hallucination(input1, input2):
116
+ print("Entering evaluate_hallucination function")
117
+ combined_input = f"{input1}[SEP]{input2}"
118
 
 
119
  output = query({"inputs": combined_input})
 
 
120
  score = output[0][0]['score']
121
+
 
122
  if score < 0.5:
123
  label = f"🔴 High risk. Score: {score:.2f}"
124
  else:
125
  label = f"🟢 Low risk. Score: {score:.2f}"
 
 
126
 
127
+ print(f"evaluate_hallucination label: {label}")
128
+ return label
129
 
130
  def save_audio(audio_input, output_dir="saved_audio"):
131
  if not os.path.exists(output_dir):
 
145
 
146
 
147
  def save_image(image_input, output_dir="saved_images"):
148
+ print("Entering save_image function")
149
  if not os.path.exists(output_dir):
150
  os.makedirs(output_dir)
151
 
 
152
  if isinstance(image_input, np.ndarray):
 
153
  image = Image.fromarray(image_input)
 
 
154
  file_name = f"image_{int(time.time())}.png"
155
  file_path = os.path.join(output_dir, file_name)
 
 
156
  image.save(file_path)
157
 
158
+ print(f"Image saved at: {file_path}")
159
  return file_path
160
  else:
161
  raise ValueError("Invalid image input type")
162
 
163
+
164
  def process_image(image_file_path):
165
+ print("Entering process_image function")
166
  client = Client("https://tonic1-official-qwen-vl-chat.hf.space/--replicas/t5ccx/") # TruEra
167
  try:
168
  result = client.predict(
169
+ "Describe this image in detail, identify every detail in this image. Describe the image the best you can.",
170
  image_file_path,
171
  fn_index=0
172
  )
173
+ print(f"Image processing result: {result}")
174
  return result
175
  except Exception as e:
176
+ print(f"Error in process_image: {e}")
177
  return f"Error occurred during image processing: {e}"
178
 
179
 
180
  def process_speech(audio_input, source_language, target_language="English"):
181
+ print("Entering process_speech function")
182
  if audio_input is None:
183
  return "No audio input provided."
184
  try:
 
188
  target_language,
189
  api_name="/s2tt"
190
  )
191
+ print(f"Speech processing result: {result}")
192
  return result
193
  except Exception as e:
194
+ print(f"Error in process_speech: {str(e)}")
195
  return f"Error in speech processing: {str(e)}"
196
 
197
  def convert_text_to_speech(input_text, source_language, target_language):
198
+ print("Entering convert_text_to_speech function")
199
  try:
200
  result = seamless_client.predict(
201
  input_text,
 
206
  audio_file_path = result[0] if result else None
207
  translated_text = result[1] if result else ""
208
 
209
+ print(f"Text-to-speech conversion result: Audio file path: {audio_file_path}, Translated text: {translated_text}")
210
  return audio_file_path, translated_text
211
  except Exception as e:
212
+ print(f"Error in convert_text_to_speech: {str(e)}")
213
  return None, f"Error in text-to-speech conversion: {str(e)}"
214
 
215
  def query_vectara(text):
 
315
  return f"Error: {response.status_code}"
316
 
317
 
 
318
  def wrap_text(text, width=90):
319
+ print("Wrapping text...")
320
  lines = text.split('\n')
321
  wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
322
  wrapped_text = '\n'.join(wrapped_lines)
 
325
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True)
326
  model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True).eval()
327
 
 
328
  class ChatBot:
329
  def __init__(self):
330
  self.history = None
331
 
332
  def predict(self, user_input, system_prompt=""):
333
+ print("Generating prediction...")
334
  response, self.history = model.chat(tokenizer, user_input, history=self.history, system=system_prompt)
335
  return response
336
 
337
  bot = ChatBot()
338
 
 
339
  def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
340
+ print("Processing multimodal prompt...")
341
  return bot.predict(user_input, system_prompt)
342
 
343
+ def process_summary_with_qwen(summary):
344
+ print("Processing summary with Qwen...")
345
+ system_prompt = "You are a medical instructor. Assess and describe the proper options to your students in minute detail. Propose a course of action for them to base their recommendations on based on your description."
346
  response_text = bot.predict(summary, system_prompt)
347
  return response_text
348
 
349
 
350
+
351
  def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None):
352
  try:
353
+ print("Processing and querying...")
354
  combined_text = ""
355
+ markdown_output = ""
356
+ image_text = ""
 
 
 
 
 
 
 
357
  print(f"Image Input Type: {type(image_input)}, Audio Input Type: {type(audio_input)}")
358
+
 
359
  if image_input is not None:
360
+ print("Processing image input...")
361
  image_file_path = save_image(image_input)
362
  image_text = process_image(image_file_path)
363
  combined_text += "\n\n**Image Input:**\n" + image_text
364
 
 
365
  elif audio_input is not None:
366
+ print("Processing audio input...")
367
  sample_rate, audio_data = audio_input
368
  audio_file_path = save_audio(audio_input)
369
+ audio_text = process_speech(audio_file_path, input_language, "English")
370
  combined_text += "\n\n**Audio Input:**\n" + audio_text
371
 
 
372
  elif text_input is not None and text_input.strip():
373
+ print("Processing text input...")
374
  combined_text += "The user asks the following to his health adviser: " + text_input
375
 
 
376
  else:
377
  return "Error: Please provide some input (text, audio, or image)."
378
 
 
379
  if image_text:
380
  markdown_output += "\n### Original Image Description\n"
381
  markdown_output += image_text + "\n"
 
 
 
382
 
383
+ print("Querying Vectara...")
384
+ vectara_response_json = query_vectara(combined_text)
385
  vectara_response = json.loads(vectara_response_json)
386
  summary = vectara_response.get('summary', 'No summary available')
387
  sources_info = vectara_response.get('sources', [])
388
 
 
389
  markdown_output = "### Vectara Response Summary\n"
390
  markdown_output += f"* **Summary**: {summary}\n"
391
  markdown_output += "### Sources Information\n"
392
  for source in sources_info:
393
  markdown_output += f"* {source}\n"
394
 
 
395
  final_response = process_summary_with_qwen(summary)
396
 
397
+ print("Converting text to speech...")
398
+ target_language = "English"
399
  audio_output, translated_text = convert_text_to_speech(final_response, target_language, input_language)
400
+
401
+ print("Evaluating hallucination...")
402
  hallucination_label = evaluate_hallucination(final_response, summary)
403
 
 
404
  markdown_output += "\n### Processed Summary with Qwen\n"
405
  markdown_output += final_response + "\n"
406
  markdown_output += "\n### Hallucination Evaluation\n"
 
409
  markdown_output += translated_text + "\n"
410
 
411
  return markdown_output, audio_output
412
+
413
  except Exception as e:
414
+ print(f"Error occurred: {e}")
415
  return f"Error occurred during processing: {e}. No hallucination evaluation.", None
416
 
417