prithivMLmods commited on
Commit
ace15c9
Β·
verified Β·
1 Parent(s): 3541fa7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -18
app.py CHANGED
@@ -60,16 +60,19 @@ def glb_to_data_url(glb_path: str) -> str:
60
  b64_data = base64.b64encode(data).decode("utf-8")
61
  return f"data:model/gltf-binary;base64,{b64_data}"
62
 
63
- def load_audio_file(file):
64
  """
65
- Loads an audio file. If file is a string path, it reads directly.
66
- Otherwise, it assumes file is a file-like object.
67
  """
68
  if isinstance(file, str):
69
- audio, samplerate = sf.read(file)
 
 
 
 
70
  else:
71
- audio, samplerate = sf.read(BytesIO(file.read()))
72
- return audio, samplerate
73
 
74
  # Model class for Text-to-3D Generation (ShapE)
75
 
@@ -468,15 +471,18 @@ def process_phi4(input_type: str, file, question: str, max_new_tokens: int = 200
468
  yield "Please upload a file and provide a question."
469
  return
470
 
 
 
 
 
 
471
  if input_type.lower() == "image":
472
  prompt = f'{user_prompt}<|image_1|>{question}{prompt_suffix}{assistant_prompt}'
473
- # Use load_image (as in Qwen2-VL-OCR-2B-Instruct) to handle image file input
474
- image = load_image(file)
475
  inputs = phi4_processor(text=prompt, images=image, return_tensors='pt').to(phi4_model.device)
476
  elif input_type.lower() == "audio":
477
  prompt = f'{user_prompt}<|audio_1|>{question}{prompt_suffix}{assistant_prompt}'
478
- # Use load_audio_file to handle audio file input
479
- audio, samplerate = load_audio_file(file)
480
  inputs = phi4_processor(text=prompt, audios=[(audio, samplerate)], return_tensors='pt').to(phi4_model.device)
481
  else:
482
  yield "Invalid input type selected."
@@ -565,7 +571,6 @@ def generate(
565
  # --- Web Search/Visit branch ---
566
  if text.strip().lower().startswith("@web"):
567
  web_command = text[len("@web"):].strip()
568
- # If the command starts with "visit", then treat the rest as a URL
569
  if web_command.lower().startswith("visit"):
570
  url = web_command[len("visit"):].strip()
571
  yield "🌍 Visiting webpage..."
@@ -573,7 +578,6 @@ def generate(
573
  content = visitor.forward(url)
574
  yield content
575
  else:
576
- # Otherwise, treat the rest as a search query.
577
  query = web_command
578
  yield "🧀 Performing a web search ..."
579
  searcher = DuckDuckGoSearchTool()
@@ -585,7 +589,6 @@ def generate(
585
  if text.strip().lower().startswith("@ragent"):
586
  prompt = text[len("@ragent"):].strip()
587
  yield "πŸ“ Initiating reasoning chain using Llama mode..."
588
- # Pass the current chat history (cleaned) to help inform the chain.
589
  for partial in ragent_reasoning(prompt, clean_chat_history(chat_history)):
590
  yield partial
591
  return
@@ -596,13 +599,12 @@ def generate(
596
  if not files or len(files) == 0:
597
  yield "Error: Please attach an image for YOLO object detection."
598
  return
599
- # Use the first attached image
600
  input_file = files[0]
601
  try:
602
  if isinstance(input_file, str):
603
  pil_image = Image.open(input_file)
604
  else:
605
- pil_image = input_file
606
  except Exception as e:
607
  yield f"Error loading image: {str(e)}"
608
  return
@@ -613,7 +615,6 @@ def generate(
613
 
614
  # --- Phi-4 Multimodal branch with text streaming ---
615
  if text.strip().lower().startswith("@phi4"):
616
- # Expected format: "@phi4 [image|audio] <your question>"
617
  parts = text.strip().split(maxsplit=2)
618
  if len(parts) < 3:
619
  yield "Error: Please provide input type and a question. Format: '@phi4 [image|audio] <your question>'"
@@ -646,9 +647,9 @@ def generate(
646
 
647
  if files:
648
  if len(files) > 1:
649
- images = [load_image(image) for image in files]
650
  elif len(files) == 1:
651
- images = [load_image(files[0])]
652
  else:
653
  images = []
654
  messages = [{
 
60
  b64_data = base64.b64encode(data).decode("utf-8")
61
  return f"data:model/gltf-binary;base64,{b64_data}"
62
 
63
+ def get_file_path(file):
64
  """
65
+ Normalize a file input. If the input is a string, assume it is a file path.
66
+ Otherwise, if the object has a 'name' attribute or key, return that.
67
  """
68
  if isinstance(file, str):
69
+ return file
70
+ elif hasattr(file, "name"):
71
+ return file.name
72
+ elif isinstance(file, dict) and "name" in file:
73
+ return file["name"]
74
  else:
75
+ return None
 
76
 
77
  # Model class for Text-to-3D Generation (ShapE)
78
 
 
471
  yield "Please upload a file and provide a question."
472
  return
473
 
474
+ file_path = get_file_path(file)
475
+ if file_path is None:
476
+ yield "Could not determine the file path."
477
+ return
478
+
479
  if input_type.lower() == "image":
480
  prompt = f'{user_prompt}<|image_1|>{question}{prompt_suffix}{assistant_prompt}'
481
+ image = Image.open(file_path)
 
482
  inputs = phi4_processor(text=prompt, images=image, return_tensors='pt').to(phi4_model.device)
483
  elif input_type.lower() == "audio":
484
  prompt = f'{user_prompt}<|audio_1|>{question}{prompt_suffix}{assistant_prompt}'
485
+ audio, samplerate = sf.read(file_path)
 
486
  inputs = phi4_processor(text=prompt, audios=[(audio, samplerate)], return_tensors='pt').to(phi4_model.device)
487
  else:
488
  yield "Invalid input type selected."
 
571
  # --- Web Search/Visit branch ---
572
  if text.strip().lower().startswith("@web"):
573
  web_command = text[len("@web"):].strip()
 
574
  if web_command.lower().startswith("visit"):
575
  url = web_command[len("visit"):].strip()
576
  yield "🌍 Visiting webpage..."
 
578
  content = visitor.forward(url)
579
  yield content
580
  else:
 
581
  query = web_command
582
  yield "🧀 Performing a web search ..."
583
  searcher = DuckDuckGoSearchTool()
 
589
  if text.strip().lower().startswith("@ragent"):
590
  prompt = text[len("@ragent"):].strip()
591
  yield "πŸ“ Initiating reasoning chain using Llama mode..."
 
592
  for partial in ragent_reasoning(prompt, clean_chat_history(chat_history)):
593
  yield partial
594
  return
 
599
  if not files or len(files) == 0:
600
  yield "Error: Please attach an image for YOLO object detection."
601
  return
 
602
  input_file = files[0]
603
  try:
604
  if isinstance(input_file, str):
605
  pil_image = Image.open(input_file)
606
  else:
607
+ pil_image = Image.open(get_file_path(input_file))
608
  except Exception as e:
609
  yield f"Error loading image: {str(e)}"
610
  return
 
615
 
616
  # --- Phi-4 Multimodal branch with text streaming ---
617
  if text.strip().lower().startswith("@phi4"):
 
618
  parts = text.strip().split(maxsplit=2)
619
  if len(parts) < 3:
620
  yield "Error: Please provide input type and a question. Format: '@phi4 [image|audio] <your question>'"
 
647
 
648
  if files:
649
  if len(files) > 1:
650
+ images = [load_image(get_file_path(image)) for image in files]
651
  elif len(files) == 1:
652
+ images = [load_image(get_file_path(files[0]))]
653
  else:
654
  images = []
655
  messages = [{