import os import json from typing import Dict, List, Tuple, Any, Optional from scene_type import SCENE_TYPES from scene_detail_templates import SCENE_DETAIL_TEMPLATES from object_template_fillers import OBJECT_TEMPLATE_FILLERS from activity_templates import ACTIVITY_TEMPLATES from safety_templates import SAFETY_TEMPLATES from confifence_templates import CONFIDENCE_TEMPLATES class SceneDescriptor: """ Generates natural language descriptions of scenes. Handles scene descriptions, activity inference, and safety concerns identification. """ def __init__(self, scene_types=None, object_categories=None): """ Initialize the scene descriptor Args: scene_types: Dictionary of scene type definitions """ self.scene_types = scene_types or {} self.SCENE_TYPES = scene_types or {} if object_categories: self.OBJECT_CATEGORIES = object_categories else: # 從 JSON 加載或使用默認值 self.OBJECT_CATEGORIES = self._load_json_data("object_categories") or { "furniture": [56, 57, 58, 59, 60, 61], "electronics": [62, 63, 64, 65, 66, 67, 68, 69, 70], "kitchen_items": [39, 40, 41, 42, 43, 44, 45], "food": [46, 47, 48, 49, 50, 51, 52, 53, 54, 55], "vehicles": [1, 2, 3, 4, 5, 6, 7, 8], "personal_items": [24, 25, 26, 27, 28, 73, 78, 79] } # 加載所有模板數據 self._load_templates() def _load_templates(self): """Load all template data from script or fallback to imported defaults""" self.confidence_templates = CONFIDENCE_TEMPLATES self.scene_detail_templates = SCENE_DETAIL_TEMPLATES self.object_template_fillers = OBJECT_TEMPLATE_FILLERS self.safety_templates = SAFETY_TEMPLATES self.activity_templates = ACTIVITY_TEMPLATES def _initialize_fallback_templates(self): """Initialize fallback templates when no external data is available""" # 只在無法從文件或導入加載時使用 self.confidence_templates = { "high": "{description} {details}", "medium": "This appears to be {description} {details}", "low": "This might be {description}, but the confidence is low. {details}" } # 僅提供最基本的模板作為後備 self.scene_detail_templates = { "default": ["A space with various objects."] } self.object_template_fillers = { "default": ["various items"] } self.safety_templates = { "general": "Pay attention to {safety_element}." } self.activity_templates = { "default": ["General activity"] } def _get_alternative_scenes(self, scene_scores: Dict[str, float], threshold: float, top_k: int = 2) -> List[Dict]: """ Get alternative scene interpretations with their scores. Args: scene_scores: Dictionary of scene type scores threshold: Minimum confidence threshold top_k: Number of alternatives to return Returns: List of dictionaries with alternative scenes """ # Sort scenes by score in descending order sorted_scenes = sorted(scene_scores.items(), key=lambda x: x[1], reverse=True) # Skip the first one (best match) and take the next top_k alternatives = [] for scene_type, score in sorted_scenes[1:1+top_k]: if score >= threshold: alternatives.append({ "type": scene_type, "name": self.SCENE_TYPES.get(scene_type, {}).get("name", "Unknown"), "confidence": score }) return alternatives def _infer_possible_activities(self, scene_type: str, detected_objects: List[Dict]) -> List[str]: """ Infer possible activities based on scene type and detected objects. Args: scene_type: Identified scene type detected_objects: List of detected objects Returns: List of possible activities """ activities = [] if scene_type.startswith("aerial_view_"): if scene_type == "aerial_view_intersection": # 使用預定義的十字路口活動 activities.extend(self.activity_templates.get("aerial_view_intersection", [])) # 添加與行人和車輛相關的特定活動 pedestrians = [obj for obj in detected_objects if obj["class_id"] == 0] vehicles = [obj for obj in detected_objects if obj["class_id"] in [2, 5, 7]] # Car, bus, truck if pedestrians and vehicles: activities.append("Waiting for an opportunity to cross the street") activities.append("Obeying traffic signals") elif scene_type == "aerial_view_commercial_area": activities.extend(self.activity_templates.get("aerial_view_commercial_area", [])) elif scene_type == "aerial_view_plaza": activities.extend(self.activity_templates.get("aerial_view_plaza", [])) else: # 處理其他未明確定義的空中視角場景 aerial_activities = [ "Street crossing", "Waiting for signals", "Following traffic rules", "Pedestrian movement" ] activities.extend(aerial_activities) if scene_type in self.activity_templates: activities.extend(self.activity_templates[scene_type]) elif "default" in self.activity_templates: activities.extend(self.activity_templates["default"]) detected_class_ids = [obj["class_id"] for obj in detected_objects] # Add activities based on specific object combinations if 62 in detected_class_ids and 57 in detected_class_ids: # TV and sofa activities.append("Watching shows or movies") if 63 in detected_class_ids: # laptop activities.append("Using a computer/laptop") if 67 in detected_class_ids: # cell phone activities.append("Using a mobile phone") if 73 in detected_class_ids: # book activities.append("Reading") if any(food_id in detected_class_ids for food_id in [46, 47, 48, 49, 50, 51, 52, 53, 54, 55]): activities.append("Eating or preparing food") # Person-specific activities if 0 in detected_class_ids: # Person if any(vehicle in detected_class_ids for vehicle in [1, 2, 3, 5, 7]): # Vehicles activities.append("Commuting or traveling") if 16 in detected_class_ids: # Dog activities.append("Walking a dog") if 24 in detected_class_ids or 26 in detected_class_ids: # Backpack or handbag activities.append("Carrying personal items") # Remove duplicates return list(set(activities)) def _identify_safety_concerns(self, detected_objects: List[Dict], scene_type: str) -> List[str]: """ Identify potential safety concerns based on objects and scene type. Args: detected_objects: List of detected objects scene_type: Identified scene type Returns: List of potential safety concerns """ concerns = [] detected_class_ids = [obj["class_id"] for obj in detected_objects] # ORIGINAL SAFETY CONCERNS LOGIC # General safety concerns if 42 in detected_class_ids or 43 in detected_class_ids: # Fork or knife concerns.append("Sharp utensils present") if 76 in detected_class_ids: # Scissors concerns.append("Cutting tools present") # Traffic-related concerns if scene_type in ["city_street", "parking_lot"]: if 0 in detected_class_ids: # Person if any(vehicle in detected_class_ids for vehicle in [2, 3, 5, 7, 8]): # Vehicles concerns.append("Pedestrians near vehicles") if 9 in detected_class_ids: # Traffic light concerns.append("Monitor traffic signals") # Identify crowded scenes person_count = detected_class_ids.count(0) if person_count > 5: concerns.append(f"Crowded area with multiple people ({person_count})") # Scene-specific concerns if scene_type == "kitchen": if 68 in detected_class_ids or 69 in detected_class_ids: # Microwave or oven concerns.append("Hot cooking equipment") # Potentially unstable objects for obj in detected_objects: if obj["class_id"] in [39, 40, 41, 45]: # Bottle, wine glass, cup, bowl if obj["region"] in ["top_left", "top_center", "top_right"] and obj["normalized_area"] > 0.05: concerns.append(f"Elevated {obj['class_name']} might be unstable") # NEW SAFETY CONCERNS LOGIC FOR ADDITIONAL SCENE TYPES # Upscale dining safety concerns if scene_type == "upscale_dining": # Check for fragile items if 40 in detected_class_ids: # Wine glass concerns.append("Fragile glassware present") # Check for lit candles (can't directly detect but can infer from context) # Look for small bright spots that might be candles if any(obj["class_id"] == 41 for obj in detected_objects): # Cup (which might include candle holders) # We can't reliably detect candles, but if the scene appears to be formal dining, # we can suggest this as a possibility concerns.append("Possible lit candles or decorative items requiring care") # Check for overcrowded table table_objs = [obj for obj in detected_objects if obj["class_id"] == 60] # Dining table if table_objs: table_region = table_objs[0]["region"] items_on_table = 0 for obj in detected_objects: if obj["class_id"] in [39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55]: if obj["region"] == table_region: items_on_table += 1 if items_on_table > 8: concerns.append("Dining table has multiple items which should be handled with care") # Asian commercial street safety concerns elif scene_type == "asian_commercial_street": # Check for crowded walkways if 0 in detected_class_ids: # Person person_count = detected_class_ids.count(0) if person_count > 3: # Calculate person density (simplified) person_positions = [] for obj in detected_objects: if obj["class_id"] == 0: person_positions.append(obj["normalized_center"]) if len(person_positions) >= 2: # Calculate average distance between people total_distance = 0 count = 0 for i in range(len(person_positions)): for j in range(i+1, len(person_positions)): p1 = person_positions[i] p2 = person_positions[j] distance = ((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2)**0.5 total_distance += distance count += 1 if count > 0: avg_distance = total_distance / count if avg_distance < 0.1: # Close proximity concerns.append("Crowded walkway with limited personal space") # Check for motorcycles/bicycles near pedestrians if (1 in detected_class_ids or 3 in detected_class_ids) and 0 in detected_class_ids: # Bicycle/motorcycle and person concerns.append("Two-wheeled vehicles in pedestrian areas") # Check for potential trip hazards # We can't directly detect this, but can infer from context if scene_type == "asian_commercial_street" and "bottom" in " ".join([obj["region"] for obj in detected_objects if obj["class_id"] == 0]): # If people are in bottom regions, they might be walking on uneven surfaces concerns.append("Potential uneven walking surfaces in commercial area") # Financial district safety concerns elif scene_type == "financial_district": # Check for heavy traffic conditions vehicle_count = sum(1 for obj_id in detected_class_ids if obj_id in [2, 5, 7]) # Car, bus, truck if vehicle_count > 5: concerns.append("Heavy vehicle traffic in urban area") # Check for pedestrians crossing busy streets if 0 in detected_class_ids: # Person person_count = detected_class_ids.count(0) vehicle_nearby = any(vehicle in detected_class_ids for vehicle in [2, 3, 5, 7]) if person_count > 0 and vehicle_nearby: concerns.append("Pedestrians navigating busy urban traffic") # Check for traffic signals if 9 in detected_class_ids: # Traffic light concerns.append("Observe traffic signals when navigating this area") else: # If no traffic lights detected but it's a busy area, it's worth noting if vehicle_count > 3: concerns.append("Busy traffic area potentially without visible traffic signals in view") # Time of day considerations # We don't have direct time data, but can infer from vehicle lights vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [2, 5, 7]] if vehicle_objs and any("lighting_conditions" in obj for obj in detected_objects): # If vehicles are present and it might be evening/night concerns.append("Reduced visibility conditions during evening commute") # Urban intersection safety concerns elif scene_type == "urban_intersection": # Check for pedestrians in crosswalks pedestrian_objs = [obj for obj in detected_objects if obj["class_id"] == 0] vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [2, 3, 5, 7]] if pedestrian_objs: # Calculate distribution of pedestrians to see if they're crossing pedestrian_positions = [obj["normalized_center"] for obj in pedestrian_objs] # Simplified check for pedestrians in crossing pattern if len(pedestrian_positions) >= 3: # Check if pedestrians are distributed across different regions pedestrian_regions = set(obj["region"] for obj in pedestrian_objs) if len(pedestrian_regions) >= 2: concerns.append("Multiple pedestrians crossing the intersection") # Check for traffic signal observation if 9 in detected_class_ids: # Traffic light concerns.append("Observe traffic signals when crossing") # Check for busy intersection if len(vehicle_objs) > 3: concerns.append("Busy intersection with multiple vehicles") # Check for pedestrians potentially jay-walking if pedestrian_objs and not 9 in detected_class_ids: # People but no traffic lights concerns.append("Pedestrians should use designated crosswalks") # Visibility concerns based on lighting # This would be better with actual lighting data pedestrian_count = len(pedestrian_objs) if pedestrian_count > 5: concerns.append("High pedestrian density at crossing points") # Transit hub safety concerns elif scene_type == "transit_hub": # These would be for transit areas like train stations or bus terminals if 0 in detected_class_ids: # Person person_count = detected_class_ids.count(0) if person_count > 8: concerns.append("Crowded transit area requiring careful navigation") # Check for luggage/bags that could be trip hazards if 24 in detected_class_ids or 28 in detected_class_ids: # Backpack or suitcase concerns.append("Luggage and personal items may create obstacles") # Public transportation vehicles if any(vehicle in detected_class_ids for vehicle in [5, 6, 7]): # Bus, train, truck concerns.append("Stay clear of arriving and departing transit vehicles") # Shopping district safety concerns elif scene_type == "shopping_district": # Check for crowded shopping areas if 0 in detected_class_ids: # Person person_count = detected_class_ids.count(0) if person_count > 5: concerns.append("Crowded shopping area with multiple people") # Check for shopping bags and personal items if 24 in detected_class_ids or 26 in detected_class_ids: # Backpack or handbag concerns.append("Mind personal belongings in busy retail environment") # Check for store entrances/exits which might have automatic doors # We can't directly detect this, but can infer from context if scene_type == "shopping_district" and 0 in detected_class_ids: concerns.append("Be aware of store entrances and exits with potential automatic doors") return concerns