from smolagents import tool import os import json import math import cv2 import numpy as np from typing import Optional @tool def analyze_spatial_relationships(image_path: Optional[str] = None, object_type1: Optional[str] = None, object_type2: Optional[str] = None) -> str: """Analyze spatial relationships between objects in an image. Args: image_path: Path to the image file to analyze. object_type1: First object type to analyze. object_type2: Second object type to analyze. Returns: A description of spatial relationships between the objects. """ try: # Check if image path exists if not image_path or not os.path.exists(image_path): return f"Error: Image path '{image_path}' does not exist." # Check if detection data exists, if not run detection detection_data_path = f"{os.path.splitext(image_path)[0]}_detection_data.json" if not os.path.exists(detection_data_path): # Run detection first from tools.detect_objects import detect_objects_with_roboflow detect_objects_with_roboflow(image_path=image_path) if not os.path.exists(detection_data_path): return f"Error: Detection failed. Could not create detection data." # Load existing detection data with open(detection_data_path, "r") as f: result = json.load(f) # Load the original image to get dimensions img = cv2.imread(image_path) if img is None: return f"Error: Could not read image at '{image_path}'." img_height, img_width = img.shape[:2] # Define "close" threshold as 30% of image width close_threshold = img_width * 0.3 # Extract objects of the specified types objects1 = [] objects2 = [] if "predictions" in result: for prediction in result["predictions"]: class_name = prediction["class"] # Check if this prediction matches either object type if object_type1 and object_type1.lower() in class_name.lower(): if "x" in prediction and "y" in prediction: objects1.append({ "class": class_name, "x": prediction["x"], "y": prediction["y"], "width": prediction["width"], "height": prediction["height"], "confidence": prediction["confidence"] }) if object_type2 and object_type2.lower() in class_name.lower(): if "x" in prediction and "y" in prediction: objects2.append({ "class": class_name, "x": prediction["x"], "y": prediction["y"], "width": prediction["width"], "height": prediction["height"], "confidence": prediction["confidence"] }) # If no objects found, return appropriate message if not objects1: return f"No objects matching '{object_type1}' were found in the image." if not objects2: return f"No objects matching '{object_type2}' were found in the image." # Analyze spatial relationships close_pairs = [] for obj1 in objects1: for obj2 in objects2: # Calculate Euclidean distance between centers distance = math.sqrt( (obj1["x"] - obj2["x"])**2 + (obj1["y"] - obj2["y"])**2 ) # Check if they are close if distance <= close_threshold: close_pairs.append({ "object1": obj1["class"], "object2": obj2["class"], "distance": distance, "distance_percent": (distance / img_width) * 100 }) # Format the results if close_pairs: output = f"Found {len(close_pairs)} instances where {object_type1} is close to {object_type2}:\n" for i, pair in enumerate(close_pairs, 1): output += f"{i}. {pair['object1']} is close to {pair['object2']} (distance: {pair['distance']:.1f} pixels, {pair['distance_percent']:.1f}% of image width)\n" output += f"\nClose is defined as objects with centers within {close_threshold:.1f} pixels (30% of image width) from each other." return output else: return f"No instances found where {object_type1} is close to {object_type2}. Close is defined as objects with centers within {close_threshold:.1f} pixels (30% of image width) from each other." except Exception as e: return f"Error analyzing spatial relationships: {str(e)}"