|
from smolagents import tool |
|
import os |
|
import json |
|
import math |
|
import cv2 |
|
import numpy as np |
|
from typing import Optional |
|
|
|
@tool |
|
def analyze_spatial_relationships(image_path: Optional[str] = None, object_type1: Optional[str] = None, object_type2: Optional[str] = None) -> str: |
|
"""Analyze spatial relationships between objects in an image. |
|
Args: |
|
image_path: Path to the image file to analyze. |
|
object_type1: First object type to analyze. |
|
object_type2: Second object type to analyze. |
|
Returns: |
|
A description of spatial relationships between the objects. |
|
""" |
|
try: |
|
|
|
if not image_path or not os.path.exists(image_path): |
|
return f"Error: Image path '{image_path}' does not exist." |
|
|
|
|
|
detection_data_path = f"{os.path.splitext(image_path)[0]}_detection_data.json" |
|
|
|
if not os.path.exists(detection_data_path): |
|
|
|
from tools.detect_objects import detect_objects_with_roboflow |
|
detect_objects_with_roboflow(image_path=image_path) |
|
|
|
if not os.path.exists(detection_data_path): |
|
return f"Error: Detection failed. Could not create detection data." |
|
|
|
|
|
with open(detection_data_path, "r") as f: |
|
result = json.load(f) |
|
|
|
|
|
img = cv2.imread(image_path) |
|
if img is None: |
|
return f"Error: Could not read image at '{image_path}'." |
|
|
|
img_height, img_width = img.shape[:2] |
|
|
|
|
|
close_threshold = img_width * 0.3 |
|
|
|
|
|
objects1 = [] |
|
objects2 = [] |
|
|
|
if "predictions" in result: |
|
for prediction in result["predictions"]: |
|
class_name = prediction["class"] |
|
|
|
|
|
if object_type1 and object_type1.lower() in class_name.lower(): |
|
if "x" in prediction and "y" in prediction: |
|
objects1.append({ |
|
"class": class_name, |
|
"x": prediction["x"], |
|
"y": prediction["y"], |
|
"width": prediction["width"], |
|
"height": prediction["height"], |
|
"confidence": prediction["confidence"] |
|
}) |
|
|
|
if object_type2 and object_type2.lower() in class_name.lower(): |
|
if "x" in prediction and "y" in prediction: |
|
objects2.append({ |
|
"class": class_name, |
|
"x": prediction["x"], |
|
"y": prediction["y"], |
|
"width": prediction["width"], |
|
"height": prediction["height"], |
|
"confidence": prediction["confidence"] |
|
}) |
|
|
|
|
|
if not objects1: |
|
return f"No objects matching '{object_type1}' were found in the image." |
|
|
|
if not objects2: |
|
return f"No objects matching '{object_type2}' were found in the image." |
|
|
|
|
|
close_pairs = [] |
|
|
|
for obj1 in objects1: |
|
for obj2 in objects2: |
|
|
|
distance = math.sqrt( |
|
(obj1["x"] - obj2["x"])**2 + |
|
(obj1["y"] - obj2["y"])**2 |
|
) |
|
|
|
|
|
if distance <= close_threshold: |
|
close_pairs.append({ |
|
"object1": obj1["class"], |
|
"object2": obj2["class"], |
|
"distance": distance, |
|
"distance_percent": (distance / img_width) * 100 |
|
}) |
|
|
|
|
|
if close_pairs: |
|
output = f"Found {len(close_pairs)} instances where {object_type1} is close to {object_type2}:\n" |
|
for i, pair in enumerate(close_pairs, 1): |
|
output += f"{i}. {pair['object1']} is close to {pair['object2']} (distance: {pair['distance']:.1f} pixels, {pair['distance_percent']:.1f}% of image width)\n" |
|
|
|
output += f"\nClose is defined as objects with centers within {close_threshold:.1f} pixels (30% of image width) from each other." |
|
return output |
|
else: |
|
return f"No instances found where {object_type1} is close to {object_type2}. Close is defined as objects with centers within {close_threshold:.1f} pixels (30% of image width) from each other." |
|
|
|
except Exception as e: |
|
return f"Error analyzing spatial relationships: {str(e)}" |