Tonic commited on
Commit
db06812
·
1 Parent(s): e86c2c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -116
app.py CHANGED
@@ -16,16 +16,14 @@ import uuid
16
 
17
 
18
  welcome_message = """
19
- # 👋🏻Welcome to ⚕🗣️😷MultiMed - Access Chat ⚕🗣️😷
20
 
21
- 🗣️📝 This is an educational and accessible conversational tool.
22
 
23
- ### How To Use ⚕🗣️😷MultiMed⚕:
24
 
25
- 🗣️📝Interact with ⚕🗣️😷MultiMed⚕ in any language using image, audio or text!
26
-
27
- 📚🌟💼 that uses [Tonic/stablemed](https://huggingface.co/Tonic/stablemed) and [adept/fuyu-8B](https://huggingface.co/adept/fuyu-8b) with [Vectara](https://huggingface.co/vectara) embeddings + retrieval w/ [Facebook/Seamless-m4t](https://huggingface.co/facebook/hf-seamless-m4t-large) for audio translation & accessibility.
28
- do [get in touch](https://discord.gg/GWpVpekp). You can also use 😷MultiMed⚕️ on your own data & in your own way by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/TeamTonic/MultiMed?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
29
  ### Join us :
30
 
31
  🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)"
@@ -75,7 +73,7 @@ languages = {
75
  # Global variables to hold component references
76
  components = {}
77
  dotenv.load_dotenv()
78
- seamless_client = Client("facebook/seamless_m4t")
79
  HuggingFace_Token = os.getenv("HuggingFace_Token")
80
  hf_token = os.getenv("HuggingFace_Token")
81
  base_model_id = os.getenv('BASE_MODEL_ID', 'default_base_model_id')
@@ -170,99 +168,51 @@ def save_image(image_input, output_dir="saved_images"):
170
  raise ValueError("Invalid image input type")
171
 
172
  def process_image(image_file_path):
173
- client = Client("https://adept-fuyu-8b-demo.hf.space/--replicas/pqjvl/")
174
-
175
- """
176
- Process the image using the Gradio client.
177
- """
178
  try:
179
- # Use the Gradio client to predict
180
- result = client.predict(
181
- image_file_path, # File path of the image
182
- True, # Enable detailed captioning
183
- fn_index=2 # Function index for the Gradio model
184
  )
185
  return result
186
  except Exception as e:
187
  return f"Error occurred during image processing: {e}"
 
188
 
189
- def process_speech(input_language, audio_input):
190
- """
191
- processing sound using seamless_m4t
192
- """
193
  if audio_input is None:
194
- return "no audio or audio did not save yet \nplease try again ! "
195
- print(f"audio : {audio_input}")
196
- print(f"audio type : {type(audio_input)}")
197
- out = seamless_client.predict(
198
- "S2TT",
199
- "file",
200
- None,
201
- audio_input,
202
- "",
203
- input_language,
204
- "English",
205
- api_name="/run",
206
- )
207
- out = out[1] # get the text
208
- try:
209
- return f"{out}"
210
- except Exception as e:
211
- return f"{e}"
212
-
213
-
214
- def is_base64(s):
215
- try:
216
- return base64.b64encode(base64.b64decode(s)) == s.encode()
217
- except Exception:
218
- return False
219
-
220
- def convert_text_to_speech(input_text: str, source_language: str, target_language: str) -> tuple[str, str]:
221
- client = Client("https://facebook-seamless-m4t.hf.space/--replicas/8cllp/")
222
-
223
  try:
224
- # Make a prediction request to the client
225
- result = client.predict(
226
- "T2ST",
227
- "text", # Since we are doing text-to-speech
228
- None,
229
- None,
230
- input_text,
231
  source_language,
232
  target_language,
233
- api_name="/run"
234
  )
 
 
 
235
 
236
- # Print or log the raw API response for inspection
237
- print("Raw API Response:", result)
238
-
239
- # Initialize variables
240
- translated_text = ""
241
- audio_file_path = ""
242
-
243
- # Process the result
244
- if result:
245
- for item in result:
246
- if isinstance(item, str):
247
- # Check if the item is a URL pointing to an audio file or a base64 encoded string
248
- if any(ext in item.lower() for ext in ['.mp3', '.wav', '.ogg']) or is_base64(item):
249
- if not audio_file_path: # Store only the first audio file path or base64 string
250
- audio_file_path = item
251
- else:
252
- # Concatenate the translated text
253
- translated_text += item + " "
254
 
255
- return audio_file_path, translated_text.strip()
 
 
 
 
 
 
 
 
256
 
 
257
  except Exception as e:
258
- print(f"Error in text-to-speech conversion: {str(e)}")
259
  return None, f"Error in text-to-speech conversion: {str(e)}"
260
 
261
 
262
  def query_vectara(text):
263
  user_message = text
264
-
265
- # Read authentication parameters from the .env file
266
  customer_id = os.getenv('CUSTOMER_ID')
267
  corpus_id = os.getenv('CORPUS_ID')
268
  api_key = os.getenv('API_KEY')
@@ -371,53 +321,38 @@ def wrap_text(text, width=90):
371
  wrapped_text = '\n'.join(wrapped_lines)
372
  return wrapped_text
373
 
 
 
 
 
374
 
375
- def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
376
-
377
- # Combine user input and system prompt
378
  formatted_input = f"{user_input}{system_prompt}"
379
 
380
  # Encode the input text
381
- encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
382
- model_inputs = encodeds.to(device)
383
 
384
- # Generate a response using the model //MODEL UNDEFINED, using peft_model instead.
385
- output = peft_model.generate(
386
- **model_inputs,
387
  max_length=512,
388
  use_cache=True,
389
  early_stopping=True,
390
- bos_token_id=peft_model.config.bos_token_id,
391
- eos_token_id=peft_model.config.eos_token_id,
392
- pad_token_id=peft_model.config.eos_token_id,
393
  temperature=0.1,
394
  do_sample=True
395
  )
396
 
397
- # Decode the response
398
  response_text = tokenizer.decode(output[0], skip_special_tokens=True)
399
 
400
  return response_text
401
 
402
-
403
- # Instantiate the Tokenizer
404
- tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True, padding_side="left")
405
- # tokenizer = AutoTokenizer.from_pretrained("Tonic/stablemed", trust_remote_code=True, padding_side="left")
406
- tokenizer.pad_token = tokenizer.eos_token
407
- tokenizer.padding_side = 'left'
408
-
409
- # Load the PEFT model
410
- peft_config = PeftConfig.from_pretrained("Tonic/stablemed", token=hf_token)
411
- peft_model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True)
412
- peft_model = PeftModel.from_pretrained(peft_model, "Tonic/stablemed", token=hf_token)
413
-
414
-
415
  class ChatBot:
416
  def __init__(self):
417
  self.history = []
418
 
419
  @staticmethod
420
- def doctor(user_input, system_prompt="You are an expert medical analyst:"):
421
  formatted_input = f"{system_prompt}{user_input}"
422
  user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")
423
  response = peft_model.generate(input_ids=user_input_ids, max_length=512, pad_token_id=tokenizer.eos_token_id)
@@ -428,13 +363,11 @@ class ChatBot:
428
  bot = ChatBot()
429
 
430
 
431
- def process_summary_with_stablemed(summary):
432
  system_prompt = "You are a medical instructor . Assess and describe the proper options to your students in minute detail. Propose a course of action for them to base their recommendations on based on your description."
433
  response_text = bot.doctor(summary, system_prompt)
434
  return response_text
435
 
436
-
437
- # Main function to handle the Gradio interface logic
438
 
439
  def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None):
440
  try:
@@ -492,18 +425,18 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
492
  for source in sources_info:
493
  markdown_output += f"* {source}\n"
494
 
495
- # Process the summary with Stablemed
496
- final_response = process_summary_with_stablemed(summary)
497
 
498
  # Convert translated text to speech and get both audio file and text
499
- target_language = "English" # Set the target language for the speech
500
  audio_output, translated_text = convert_text_to_speech(final_response, target_language, input_language)
501
 
502
  # Evaluate hallucination
503
  hallucination_label = evaluate_hallucination(final_response, summary)
504
 
505
  # Add final response and hallucination label to Markdown output
506
- markdown_output += "\n### Processed Summary with StableMed\n"
507
  markdown_output += final_response + "\n"
508
  markdown_output += "\n### Hallucination Evaluation\n"
509
  markdown_output += f"* **Label**: {hallucination_label}\n"
@@ -517,12 +450,10 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
517
 
518
 
519
  def clear():
520
- # Return default values for each component
521
  return "English", None, None, "", None
522
 
523
 
524
  def create_interface():
525
- # with gr.Blocks(theme='ParityError/Anime') as iface:
526
  with gr.Blocks(theme='ParityError/Anime') as interface:
527
  # Display the welcome message
528
  gr.Markdown(welcome_message)
 
16
 
17
 
18
  welcome_message = """
19
+ # 👋🏻Welcome to ⚕🗣️😷TruEra - MultiMed ⚕🗣️😷
20
 
21
+ 🗣️📝 This is an accessible and multimodal tool optimized using TruEra! We evaluated several configurations, prompts, and models to optimize this application.
22
 
23
+ ### How To Use ⚕🗣️😷TruEra - MultiMed⚕:
24
 
25
+ 🗣️📝Interact with ⚕🗣️😷TruEra - MultiMed⚕ in any language using image, audio or text. ⚕🗣️😷TruEra - MultiMed is an accessible application 📚🌟💼 that uses [Qwen/Qwen-1_8B-Chat](https://huggingface.co/Qwen/Qwen-1_8B-Chat) and [Tonic1/Official-Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat) with [Vectara](https://huggingface.co/vectara) embeddings + retrieval w/ [facebook/seamless-m4t-v2-large](https://huggingface.co/facebook/hf-seamless-m4t-large) for audio translation & accessibility.
26
+ do [get in touch](https://discord.gg/GWpVpekp). You can also use 😷TruEra MultiMed⚕️ on your own data & in your own way by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/TeamTonic/MultiMed?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
 
 
27
  ### Join us :
28
 
29
  🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)"
 
73
  # Global variables to hold component references
74
  components = {}
75
  dotenv.load_dotenv()
76
+ seamless_client = Client("https://facebook-seamless-m4t-v2-large.hf.space/--replicas/j95rl/")
77
  HuggingFace_Token = os.getenv("HuggingFace_Token")
78
  hf_token = os.getenv("HuggingFace_Token")
79
  base_model_id = os.getenv('BASE_MODEL_ID', 'default_base_model_id')
 
168
  raise ValueError("Invalid image input type")
169
 
170
  def process_image(image_file_path):
171
+ client = Client("https://tonic1-official-qwen-vl-chat.hf.space/--replicas/xhs6q/") # TruEra
 
 
 
 
172
  try:
173
+ result = client.predict(
174
+ "Describe this image in detail, identify every detail in this image. Describe the image the best you can.", # TruEra
175
+ image_file_path,
176
+ fn_index=0
 
177
  )
178
  return result
179
  except Exception as e:
180
  return f"Error occurred during image processing: {e}"
181
+ def process_speech(audio_input, source_language, target_language="English"):
182
 
 
 
 
 
183
  if audio_input is None:
184
+ return "No audio input provided."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  try:
186
+ # Predict using the client
187
+ result = seamless_client.predict(
188
+ audio_input, # File path of the audio
 
 
 
 
189
  source_language,
190
  target_language,
191
+ api_name="/s2tt"
192
  )
193
+ return result
194
+ except Exception as e:
195
+ return f"Error in speech processing: {str(e)}"
196
 
197
+ def convert_text_to_speech(input_text, source_language, target_language):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ try:
200
+ result = seamless_client.predict(
201
+ input_text,
202
+ source_language,
203
+ target_language,
204
+ api_name="/t2st"
205
+ )
206
+ audio_file_path = result[0] if result else None
207
+ translated_text = result[1] if result else ""
208
 
209
+ return audio_file_path, translated_text
210
  except Exception as e:
 
211
  return None, f"Error in text-to-speech conversion: {str(e)}"
212
 
213
 
214
  def query_vectara(text):
215
  user_message = text
 
 
216
  customer_id = os.getenv('CUSTOMER_ID')
217
  corpus_id = os.getenv('CORPUS_ID')
218
  api_key = os.getenv('API_KEY')
 
321
  wrapped_text = '\n'.join(wrapped_lines)
322
  return wrapped_text
323
 
324
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True) # TruEra
325
+ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True).eval()
326
+ device = "cuda" if torch.cuda.is_available() else "cpu"
327
+ model.to(device)
328
 
329
+ def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"): # TruEra
 
 
330
  formatted_input = f"{user_input}{system_prompt}"
331
 
332
  # Encode the input text
333
+ encoded_input = tokenizer(formatted_input, return_tensors="pt").to(device)
 
334
 
335
+ # Generate a response using the model
336
+ output = model.generate(
337
+ **encoded_input,
338
  max_length=512,
339
  use_cache=True,
340
  early_stopping=True,
341
+ pad_token_id=tokenizer.eos_token_id,
 
 
342
  temperature=0.1,
343
  do_sample=True
344
  )
345
 
 
346
  response_text = tokenizer.decode(output[0], skip_special_tokens=True)
347
 
348
  return response_text
349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  class ChatBot:
351
  def __init__(self):
352
  self.history = []
353
 
354
  @staticmethod
355
+ def doctor(user_input, system_prompt="You are an expert medical analyst:"): # TruEra
356
  formatted_input = f"{system_prompt}{user_input}"
357
  user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")
358
  response = peft_model.generate(input_ids=user_input_ids, max_length=512, pad_token_id=tokenizer.eos_token_id)
 
363
  bot = ChatBot()
364
 
365
 
366
+ def process_summary_with_qwen(summary): # TruEra
367
  system_prompt = "You are a medical instructor . Assess and describe the proper options to your students in minute detail. Propose a course of action for them to base their recommendations on based on your description."
368
  response_text = bot.doctor(summary, system_prompt)
369
  return response_text
370
 
 
 
371
 
372
  def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None):
373
  try:
 
425
  for source in sources_info:
426
  markdown_output += f"* {source}\n"
427
 
428
+ # Process the summary with Qwen
429
+ final_response = process_summary_with_qwen(summary)
430
 
431
  # Convert translated text to speech and get both audio file and text
432
+ target_language = "English"
433
  audio_output, translated_text = convert_text_to_speech(final_response, target_language, input_language)
434
 
435
  # Evaluate hallucination
436
  hallucination_label = evaluate_hallucination(final_response, summary)
437
 
438
  # Add final response and hallucination label to Markdown output
439
+ markdown_output += "\n### Processed Summary with Qwen\n"
440
  markdown_output += final_response + "\n"
441
  markdown_output += "\n### Hallucination Evaluation\n"
442
  markdown_output += f"* **Label**: {hallucination_label}\n"
 
450
 
451
 
452
  def clear():
 
453
  return "English", None, None, "", None
454
 
455
 
456
  def create_interface():
 
457
  with gr.Blocks(theme='ParityError/Anime') as interface:
458
  # Display the welcome message
459
  gr.Markdown(welcome_message)