Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -60,16 +60,19 @@ def glb_to_data_url(glb_path: str) -> str:
|
|
60 |
b64_data = base64.b64encode(data).decode("utf-8")
|
61 |
return f"data:model/gltf-binary;base64,{b64_data}"
|
62 |
|
63 |
-
def
|
64 |
"""
|
65 |
-
|
66 |
-
Otherwise,
|
67 |
"""
|
68 |
if isinstance(file, str):
|
69 |
-
|
|
|
|
|
|
|
|
|
70 |
else:
|
71 |
-
|
72 |
-
return audio, samplerate
|
73 |
|
74 |
# Model class for Text-to-3D Generation (ShapE)
|
75 |
|
@@ -468,15 +471,18 @@ def process_phi4(input_type: str, file, question: str, max_new_tokens: int = 200
|
|
468 |
yield "Please upload a file and provide a question."
|
469 |
return
|
470 |
|
|
|
|
|
|
|
|
|
|
|
471 |
if input_type.lower() == "image":
|
472 |
prompt = f'{user_prompt}<|image_1|>{question}{prompt_suffix}{assistant_prompt}'
|
473 |
-
|
474 |
-
image = load_image(file)
|
475 |
inputs = phi4_processor(text=prompt, images=image, return_tensors='pt').to(phi4_model.device)
|
476 |
elif input_type.lower() == "audio":
|
477 |
prompt = f'{user_prompt}<|audio_1|>{question}{prompt_suffix}{assistant_prompt}'
|
478 |
-
|
479 |
-
audio, samplerate = load_audio_file(file)
|
480 |
inputs = phi4_processor(text=prompt, audios=[(audio, samplerate)], return_tensors='pt').to(phi4_model.device)
|
481 |
else:
|
482 |
yield "Invalid input type selected."
|
@@ -565,7 +571,6 @@ def generate(
|
|
565 |
# --- Web Search/Visit branch ---
|
566 |
if text.strip().lower().startswith("@web"):
|
567 |
web_command = text[len("@web"):].strip()
|
568 |
-
# If the command starts with "visit", then treat the rest as a URL
|
569 |
if web_command.lower().startswith("visit"):
|
570 |
url = web_command[len("visit"):].strip()
|
571 |
yield "π Visiting webpage..."
|
@@ -573,7 +578,6 @@ def generate(
|
|
573 |
content = visitor.forward(url)
|
574 |
yield content
|
575 |
else:
|
576 |
-
# Otherwise, treat the rest as a search query.
|
577 |
query = web_command
|
578 |
yield "π§€ Performing a web search ..."
|
579 |
searcher = DuckDuckGoSearchTool()
|
@@ -585,7 +589,6 @@ def generate(
|
|
585 |
if text.strip().lower().startswith("@ragent"):
|
586 |
prompt = text[len("@ragent"):].strip()
|
587 |
yield "π Initiating reasoning chain using Llama mode..."
|
588 |
-
# Pass the current chat history (cleaned) to help inform the chain.
|
589 |
for partial in ragent_reasoning(prompt, clean_chat_history(chat_history)):
|
590 |
yield partial
|
591 |
return
|
@@ -596,13 +599,12 @@ def generate(
|
|
596 |
if not files or len(files) == 0:
|
597 |
yield "Error: Please attach an image for YOLO object detection."
|
598 |
return
|
599 |
-
# Use the first attached image
|
600 |
input_file = files[0]
|
601 |
try:
|
602 |
if isinstance(input_file, str):
|
603 |
pil_image = Image.open(input_file)
|
604 |
else:
|
605 |
-
pil_image = input_file
|
606 |
except Exception as e:
|
607 |
yield f"Error loading image: {str(e)}"
|
608 |
return
|
@@ -613,7 +615,6 @@ def generate(
|
|
613 |
|
614 |
# --- Phi-4 Multimodal branch with text streaming ---
|
615 |
if text.strip().lower().startswith("@phi4"):
|
616 |
-
# Expected format: "@phi4 [image|audio] <your question>"
|
617 |
parts = text.strip().split(maxsplit=2)
|
618 |
if len(parts) < 3:
|
619 |
yield "Error: Please provide input type and a question. Format: '@phi4 [image|audio] <your question>'"
|
@@ -646,9 +647,9 @@ def generate(
|
|
646 |
|
647 |
if files:
|
648 |
if len(files) > 1:
|
649 |
-
images = [load_image(image) for image in files]
|
650 |
elif len(files) == 1:
|
651 |
-
images = [load_image(files[0])]
|
652 |
else:
|
653 |
images = []
|
654 |
messages = [{
|
|
|
60 |
b64_data = base64.b64encode(data).decode("utf-8")
|
61 |
return f"data:model/gltf-binary;base64,{b64_data}"
|
62 |
|
63 |
+
def get_file_path(file):
|
64 |
"""
|
65 |
+
Normalize a file input. If the input is a string, assume it is a file path.
|
66 |
+
Otherwise, if the object has a 'name' attribute or key, return that.
|
67 |
"""
|
68 |
if isinstance(file, str):
|
69 |
+
return file
|
70 |
+
elif hasattr(file, "name"):
|
71 |
+
return file.name
|
72 |
+
elif isinstance(file, dict) and "name" in file:
|
73 |
+
return file["name"]
|
74 |
else:
|
75 |
+
return None
|
|
|
76 |
|
77 |
# Model class for Text-to-3D Generation (ShapE)
|
78 |
|
|
|
471 |
yield "Please upload a file and provide a question."
|
472 |
return
|
473 |
|
474 |
+
file_path = get_file_path(file)
|
475 |
+
if file_path is None:
|
476 |
+
yield "Could not determine the file path."
|
477 |
+
return
|
478 |
+
|
479 |
if input_type.lower() == "image":
|
480 |
prompt = f'{user_prompt}<|image_1|>{question}{prompt_suffix}{assistant_prompt}'
|
481 |
+
image = Image.open(file_path)
|
|
|
482 |
inputs = phi4_processor(text=prompt, images=image, return_tensors='pt').to(phi4_model.device)
|
483 |
elif input_type.lower() == "audio":
|
484 |
prompt = f'{user_prompt}<|audio_1|>{question}{prompt_suffix}{assistant_prompt}'
|
485 |
+
audio, samplerate = sf.read(file_path)
|
|
|
486 |
inputs = phi4_processor(text=prompt, audios=[(audio, samplerate)], return_tensors='pt').to(phi4_model.device)
|
487 |
else:
|
488 |
yield "Invalid input type selected."
|
|
|
571 |
# --- Web Search/Visit branch ---
|
572 |
if text.strip().lower().startswith("@web"):
|
573 |
web_command = text[len("@web"):].strip()
|
|
|
574 |
if web_command.lower().startswith("visit"):
|
575 |
url = web_command[len("visit"):].strip()
|
576 |
yield "π Visiting webpage..."
|
|
|
578 |
content = visitor.forward(url)
|
579 |
yield content
|
580 |
else:
|
|
|
581 |
query = web_command
|
582 |
yield "π§€ Performing a web search ..."
|
583 |
searcher = DuckDuckGoSearchTool()
|
|
|
589 |
if text.strip().lower().startswith("@ragent"):
|
590 |
prompt = text[len("@ragent"):].strip()
|
591 |
yield "π Initiating reasoning chain using Llama mode..."
|
|
|
592 |
for partial in ragent_reasoning(prompt, clean_chat_history(chat_history)):
|
593 |
yield partial
|
594 |
return
|
|
|
599 |
if not files or len(files) == 0:
|
600 |
yield "Error: Please attach an image for YOLO object detection."
|
601 |
return
|
|
|
602 |
input_file = files[0]
|
603 |
try:
|
604 |
if isinstance(input_file, str):
|
605 |
pil_image = Image.open(input_file)
|
606 |
else:
|
607 |
+
pil_image = Image.open(get_file_path(input_file))
|
608 |
except Exception as e:
|
609 |
yield f"Error loading image: {str(e)}"
|
610 |
return
|
|
|
615 |
|
616 |
# --- Phi-4 Multimodal branch with text streaming ---
|
617 |
if text.strip().lower().startswith("@phi4"):
|
|
|
618 |
parts = text.strip().split(maxsplit=2)
|
619 |
if len(parts) < 3:
|
620 |
yield "Error: Please provide input type and a question. Format: '@phi4 [image|audio] <your question>'"
|
|
|
647 |
|
648 |
if files:
|
649 |
if len(files) > 1:
|
650 |
+
images = [load_image(get_file_path(image)) for image in files]
|
651 |
elif len(files) == 1:
|
652 |
+
images = [load_image(get_file_path(files[0]))]
|
653 |
else:
|
654 |
images = []
|
655 |
messages = [{
|