Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -85,6 +85,7 @@ image_description = ""
|
|
85 |
|
86 |
|
87 |
def check_hallucination(assertion, citation):
|
|
|
88 |
api_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
|
89 |
header = {"Authorization": f"Bearer {hf_token}"}
|
90 |
payload = {"inputs": f"{assertion} [SEP] {citation}"}
|
@@ -93,6 +94,7 @@ def check_hallucination(assertion, citation):
|
|
93 |
output = response.json()
|
94 |
output = output[0][0]["score"]
|
95 |
|
|
|
96 |
return f"**hallucination score:** {output}"
|
97 |
|
98 |
|
@@ -104,29 +106,26 @@ headers = {"Authorization": f"Bearer {hf_token}"}
|
|
104 |
|
105 |
# Function to query the API
|
106 |
def query(payload):
|
|
|
107 |
response = requests.post(vapi_url, headers=headers, json=payload)
|
|
|
108 |
return response.json()
|
109 |
|
110 |
-
|
111 |
# Function to evaluate hallucination
|
112 |
def evaluate_hallucination(input1, input2):
|
113 |
-
|
114 |
-
combined_input = f"{input1}
|
115 |
|
116 |
-
# Make the API call
|
117 |
output = query({"inputs": combined_input})
|
118 |
-
|
119 |
-
# Extract the score from the output
|
120 |
score = output[0][0]['score']
|
121 |
-
|
122 |
-
# Generate a label based on the score
|
123 |
if score < 0.5:
|
124 |
label = f"🔴 High risk. Score: {score:.2f}"
|
125 |
else:
|
126 |
label = f"🟢 Low risk. Score: {score:.2f}"
|
127 |
-
|
128 |
-
return label
|
129 |
|
|
|
|
|
130 |
|
131 |
def save_audio(audio_input, output_dir="saved_audio"):
|
132 |
if not os.path.exists(output_dir):
|
@@ -146,39 +145,40 @@ def save_audio(audio_input, output_dir="saved_audio"):
|
|
146 |
|
147 |
|
148 |
def save_image(image_input, output_dir="saved_images"):
|
|
|
149 |
if not os.path.exists(output_dir):
|
150 |
os.makedirs(output_dir)
|
151 |
|
152 |
-
# Assuming image_input is a NumPy array
|
153 |
if isinstance(image_input, np.ndarray):
|
154 |
-
# Convert NumPy arrays to PIL Image
|
155 |
image = Image.fromarray(image_input)
|
156 |
-
|
157 |
-
# Generate a unique file name
|
158 |
file_name = f"image_{int(time.time())}.png"
|
159 |
file_path = os.path.join(output_dir, file_name)
|
160 |
-
|
161 |
-
# Save the image file
|
162 |
image.save(file_path)
|
163 |
|
|
|
164 |
return file_path
|
165 |
else:
|
166 |
raise ValueError("Invalid image input type")
|
167 |
|
|
|
168 |
def process_image(image_file_path):
|
|
|
169 |
client = Client("https://tonic1-official-qwen-vl-chat.hf.space/--replicas/t5ccx/") # TruEra
|
170 |
try:
|
171 |
result = client.predict(
|
172 |
-
"Describe this image in detail, identify every detail in this image. Describe the image the best you can.",
|
173 |
image_file_path,
|
174 |
fn_index=0
|
175 |
)
|
|
|
176 |
return result
|
177 |
except Exception as e:
|
|
|
178 |
return f"Error occurred during image processing: {e}"
|
179 |
|
180 |
|
181 |
def process_speech(audio_input, source_language, target_language="English"):
|
|
|
182 |
if audio_input is None:
|
183 |
return "No audio input provided."
|
184 |
try:
|
@@ -188,11 +188,14 @@ def process_speech(audio_input, source_language, target_language="English"):
|
|
188 |
target_language,
|
189 |
api_name="/s2tt"
|
190 |
)
|
|
|
191 |
return result
|
192 |
except Exception as e:
|
|
|
193 |
return f"Error in speech processing: {str(e)}"
|
194 |
|
195 |
def convert_text_to_speech(input_text, source_language, target_language):
|
|
|
196 |
try:
|
197 |
result = seamless_client.predict(
|
198 |
input_text,
|
@@ -203,8 +206,10 @@ def convert_text_to_speech(input_text, source_language, target_language):
|
|
203 |
audio_file_path = result[0] if result else None
|
204 |
translated_text = result[1] if result else ""
|
205 |
|
|
|
206 |
return audio_file_path, translated_text
|
207 |
except Exception as e:
|
|
|
208 |
return None, f"Error in text-to-speech conversion: {str(e)}"
|
209 |
|
210 |
def query_vectara(text):
|
@@ -310,8 +315,8 @@ def query_vectara(text):
|
|
310 |
return f"Error: {response.status_code}"
|
311 |
|
312 |
|
313 |
-
# Functions to Wrap the Prompt Correctly
|
314 |
def wrap_text(text, width=90):
|
|
|
315 |
lines = text.split('\n')
|
316 |
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
|
317 |
wrapped_text = '\n'.join(wrapped_lines)
|
@@ -320,96 +325,82 @@ def wrap_text(text, width=90):
|
|
320 |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True)
|
321 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True).eval()
|
322 |
|
323 |
-
# TruEra
|
324 |
class ChatBot:
|
325 |
def __init__(self):
|
326 |
self.history = None
|
327 |
|
328 |
def predict(self, user_input, system_prompt=""):
|
|
|
329 |
response, self.history = model.chat(tokenizer, user_input, history=self.history, system=system_prompt)
|
330 |
return response
|
331 |
|
332 |
bot = ChatBot()
|
333 |
|
334 |
-
# TruEra
|
335 |
def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
|
|
|
336 |
return bot.predict(user_input, system_prompt)
|
337 |
|
338 |
-
|
339 |
-
|
340 |
-
system_prompt = "You are a medical instructor
|
341 |
response_text = bot.predict(summary, system_prompt)
|
342 |
return response_text
|
343 |
|
344 |
|
|
|
345 |
def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None):
|
346 |
try:
|
347 |
-
|
348 |
combined_text = ""
|
349 |
-
markdown_output = ""
|
350 |
-
image_text = ""
|
351 |
-
language_code = None
|
352 |
-
|
353 |
-
# Convert input language to its code
|
354 |
-
if input_language and input_language in languages:
|
355 |
-
language_code = languages[input_language]
|
356 |
-
|
357 |
-
# Debugging print statement
|
358 |
print(f"Image Input Type: {type(image_input)}, Audio Input Type: {type(audio_input)}")
|
359 |
-
|
360 |
-
# Process image input
|
361 |
if image_input is not None:
|
362 |
-
|
363 |
image_file_path = save_image(image_input)
|
364 |
image_text = process_image(image_file_path)
|
365 |
combined_text += "\n\n**Image Input:**\n" + image_text
|
366 |
|
367 |
-
# Process audio input
|
368 |
elif audio_input is not None:
|
|
|
369 |
sample_rate, audio_data = audio_input
|
370 |
audio_file_path = save_audio(audio_input)
|
371 |
-
audio_text = process_speech(audio_file_path,
|
372 |
combined_text += "\n\n**Audio Input:**\n" + audio_text
|
373 |
|
374 |
-
# Process text input
|
375 |
elif text_input is not None and text_input.strip():
|
|
|
376 |
combined_text += "The user asks the following to his health adviser: " + text_input
|
377 |
|
378 |
-
# Check if combined text is empty
|
379 |
else:
|
380 |
return "Error: Please provide some input (text, audio, or image)."
|
381 |
|
382 |
-
# Append the original image description in Markdown
|
383 |
if image_text:
|
384 |
markdown_output += "\n### Original Image Description\n"
|
385 |
markdown_output += image_text + "\n"
|
386 |
-
|
387 |
-
# Use the text to query Vectara
|
388 |
-
vectara_response_json = query_vectara(combined_text)
|
389 |
|
390 |
-
|
|
|
391 |
vectara_response = json.loads(vectara_response_json)
|
392 |
summary = vectara_response.get('summary', 'No summary available')
|
393 |
sources_info = vectara_response.get('sources', [])
|
394 |
|
395 |
-
# Format Vectara response in Markdown
|
396 |
markdown_output = "### Vectara Response Summary\n"
|
397 |
markdown_output += f"* **Summary**: {summary}\n"
|
398 |
markdown_output += "### Sources Information\n"
|
399 |
for source in sources_info:
|
400 |
markdown_output += f"* {source}\n"
|
401 |
|
402 |
-
# Process the summary with Qwen
|
403 |
final_response = process_summary_with_qwen(summary)
|
404 |
|
405 |
-
|
406 |
-
target_language = "English"
|
407 |
audio_output, translated_text = convert_text_to_speech(final_response, target_language, input_language)
|
408 |
-
|
409 |
-
|
410 |
hallucination_label = evaluate_hallucination(final_response, summary)
|
411 |
|
412 |
-
# Add final response and hallucination label to Markdown output
|
413 |
markdown_output += "\n### Processed Summary with Qwen\n"
|
414 |
markdown_output += final_response + "\n"
|
415 |
markdown_output += "\n### Hallucination Evaluation\n"
|
@@ -418,8 +409,9 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
|
|
418 |
markdown_output += translated_text + "\n"
|
419 |
|
420 |
return markdown_output, audio_output
|
421 |
-
|
422 |
except Exception as e:
|
|
|
423 |
return f"Error occurred during processing: {e}. No hallucination evaluation.", None
|
424 |
|
425 |
|
|
|
85 |
|
86 |
|
87 |
def check_hallucination(assertion, citation):
|
88 |
+
print("Entering check_hallucination function")
|
89 |
api_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
|
90 |
header = {"Authorization": f"Bearer {hf_token}"}
|
91 |
payload = {"inputs": f"{assertion} [SEP] {citation}"}
|
|
|
94 |
output = response.json()
|
95 |
output = output[0][0]["score"]
|
96 |
|
97 |
+
print(f"check_hallucination output: {output}")
|
98 |
return f"**hallucination score:** {output}"
|
99 |
|
100 |
|
|
|
106 |
|
107 |
# Function to query the API
|
108 |
def query(payload):
|
109 |
+
print("Entering query function")
|
110 |
response = requests.post(vapi_url, headers=headers, json=payload)
|
111 |
+
print(f"API response: {response.json()}")
|
112 |
return response.json()
|
113 |
|
|
|
114 |
# Function to evaluate hallucination
|
115 |
def evaluate_hallucination(input1, input2):
|
116 |
+
print("Entering evaluate_hallucination function")
|
117 |
+
combined_input = f"{input1}[SEP]{input2}"
|
118 |
|
|
|
119 |
output = query({"inputs": combined_input})
|
|
|
|
|
120 |
score = output[0][0]['score']
|
121 |
+
|
|
|
122 |
if score < 0.5:
|
123 |
label = f"🔴 High risk. Score: {score:.2f}"
|
124 |
else:
|
125 |
label = f"🟢 Low risk. Score: {score:.2f}"
|
|
|
|
|
126 |
|
127 |
+
print(f"evaluate_hallucination label: {label}")
|
128 |
+
return label
|
129 |
|
130 |
def save_audio(audio_input, output_dir="saved_audio"):
|
131 |
if not os.path.exists(output_dir):
|
|
|
145 |
|
146 |
|
147 |
def save_image(image_input, output_dir="saved_images"):
|
148 |
+
print("Entering save_image function")
|
149 |
if not os.path.exists(output_dir):
|
150 |
os.makedirs(output_dir)
|
151 |
|
|
|
152 |
if isinstance(image_input, np.ndarray):
|
|
|
153 |
image = Image.fromarray(image_input)
|
|
|
|
|
154 |
file_name = f"image_{int(time.time())}.png"
|
155 |
file_path = os.path.join(output_dir, file_name)
|
|
|
|
|
156 |
image.save(file_path)
|
157 |
|
158 |
+
print(f"Image saved at: {file_path}")
|
159 |
return file_path
|
160 |
else:
|
161 |
raise ValueError("Invalid image input type")
|
162 |
|
163 |
+
|
164 |
def process_image(image_file_path):
|
165 |
+
print("Entering process_image function")
|
166 |
client = Client("https://tonic1-official-qwen-vl-chat.hf.space/--replicas/t5ccx/") # TruEra
|
167 |
try:
|
168 |
result = client.predict(
|
169 |
+
"Describe this image in detail, identify every detail in this image. Describe the image the best you can.",
|
170 |
image_file_path,
|
171 |
fn_index=0
|
172 |
)
|
173 |
+
print(f"Image processing result: {result}")
|
174 |
return result
|
175 |
except Exception as e:
|
176 |
+
print(f"Error in process_image: {e}")
|
177 |
return f"Error occurred during image processing: {e}"
|
178 |
|
179 |
|
180 |
def process_speech(audio_input, source_language, target_language="English"):
|
181 |
+
print("Entering process_speech function")
|
182 |
if audio_input is None:
|
183 |
return "No audio input provided."
|
184 |
try:
|
|
|
188 |
target_language,
|
189 |
api_name="/s2tt"
|
190 |
)
|
191 |
+
print(f"Speech processing result: {result}")
|
192 |
return result
|
193 |
except Exception as e:
|
194 |
+
print(f"Error in process_speech: {str(e)}")
|
195 |
return f"Error in speech processing: {str(e)}"
|
196 |
|
197 |
def convert_text_to_speech(input_text, source_language, target_language):
|
198 |
+
print("Entering convert_text_to_speech function")
|
199 |
try:
|
200 |
result = seamless_client.predict(
|
201 |
input_text,
|
|
|
206 |
audio_file_path = result[0] if result else None
|
207 |
translated_text = result[1] if result else ""
|
208 |
|
209 |
+
print(f"Text-to-speech conversion result: Audio file path: {audio_file_path}, Translated text: {translated_text}")
|
210 |
return audio_file_path, translated_text
|
211 |
except Exception as e:
|
212 |
+
print(f"Error in convert_text_to_speech: {str(e)}")
|
213 |
return None, f"Error in text-to-speech conversion: {str(e)}"
|
214 |
|
215 |
def query_vectara(text):
|
|
|
315 |
return f"Error: {response.status_code}"
|
316 |
|
317 |
|
|
|
318 |
def wrap_text(text, width=90):
|
319 |
+
print("Wrapping text...")
|
320 |
lines = text.split('\n')
|
321 |
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
|
322 |
wrapped_text = '\n'.join(wrapped_lines)
|
|
|
325 |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True)
|
326 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True).eval()
|
327 |
|
|
|
328 |
class ChatBot:
|
329 |
def __init__(self):
|
330 |
self.history = None
|
331 |
|
332 |
def predict(self, user_input, system_prompt=""):
|
333 |
+
print("Generating prediction...")
|
334 |
response, self.history = model.chat(tokenizer, user_input, history=self.history, system=system_prompt)
|
335 |
return response
|
336 |
|
337 |
bot = ChatBot()
|
338 |
|
|
|
339 |
def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
|
340 |
+
print("Processing multimodal prompt...")
|
341 |
return bot.predict(user_input, system_prompt)
|
342 |
|
343 |
+
def process_summary_with_qwen(summary):
|
344 |
+
print("Processing summary with Qwen...")
|
345 |
+
system_prompt = "You are a medical instructor. Assess and describe the proper options to your students in minute detail. Propose a course of action for them to base their recommendations on based on your description."
|
346 |
response_text = bot.predict(summary, system_prompt)
|
347 |
return response_text
|
348 |
|
349 |
|
350 |
+
|
351 |
def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None):
|
352 |
try:
|
353 |
+
print("Processing and querying...")
|
354 |
combined_text = ""
|
355 |
+
markdown_output = ""
|
356 |
+
image_text = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
print(f"Image Input Type: {type(image_input)}, Audio Input Type: {type(audio_input)}")
|
358 |
+
|
|
|
359 |
if image_input is not None:
|
360 |
+
print("Processing image input...")
|
361 |
image_file_path = save_image(image_input)
|
362 |
image_text = process_image(image_file_path)
|
363 |
combined_text += "\n\n**Image Input:**\n" + image_text
|
364 |
|
|
|
365 |
elif audio_input is not None:
|
366 |
+
print("Processing audio input...")
|
367 |
sample_rate, audio_data = audio_input
|
368 |
audio_file_path = save_audio(audio_input)
|
369 |
+
audio_text = process_speech(audio_file_path, input_language, "English")
|
370 |
combined_text += "\n\n**Audio Input:**\n" + audio_text
|
371 |
|
|
|
372 |
elif text_input is not None and text_input.strip():
|
373 |
+
print("Processing text input...")
|
374 |
combined_text += "The user asks the following to his health adviser: " + text_input
|
375 |
|
|
|
376 |
else:
|
377 |
return "Error: Please provide some input (text, audio, or image)."
|
378 |
|
|
|
379 |
if image_text:
|
380 |
markdown_output += "\n### Original Image Description\n"
|
381 |
markdown_output += image_text + "\n"
|
|
|
|
|
|
|
382 |
|
383 |
+
print("Querying Vectara...")
|
384 |
+
vectara_response_json = query_vectara(combined_text)
|
385 |
vectara_response = json.loads(vectara_response_json)
|
386 |
summary = vectara_response.get('summary', 'No summary available')
|
387 |
sources_info = vectara_response.get('sources', [])
|
388 |
|
|
|
389 |
markdown_output = "### Vectara Response Summary\n"
|
390 |
markdown_output += f"* **Summary**: {summary}\n"
|
391 |
markdown_output += "### Sources Information\n"
|
392 |
for source in sources_info:
|
393 |
markdown_output += f"* {source}\n"
|
394 |
|
|
|
395 |
final_response = process_summary_with_qwen(summary)
|
396 |
|
397 |
+
print("Converting text to speech...")
|
398 |
+
target_language = "English"
|
399 |
audio_output, translated_text = convert_text_to_speech(final_response, target_language, input_language)
|
400 |
+
|
401 |
+
print("Evaluating hallucination...")
|
402 |
hallucination_label = evaluate_hallucination(final_response, summary)
|
403 |
|
|
|
404 |
markdown_output += "\n### Processed Summary with Qwen\n"
|
405 |
markdown_output += final_response + "\n"
|
406 |
markdown_output += "\n### Hallucination Evaluation\n"
|
|
|
409 |
markdown_output += translated_text + "\n"
|
410 |
|
411 |
return markdown_output, audio_output
|
412 |
+
|
413 |
except Exception as e:
|
414 |
+
print(f"Error occurred: {e}")
|
415 |
return f"Error occurred during processing: {e}. No hallucination evaluation.", None
|
416 |
|
417 |
|