Spaces:
Running
on
Zero
Running
on
Zero
File size: 18,301 Bytes
3172319 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 |
import os
import json
from typing import Dict, List, Tuple, Any, Optional
from scene_type import SCENE_TYPES
from scene_detail_templates import SCENE_DETAIL_TEMPLATES
from object_template_fillers import OBJECT_TEMPLATE_FILLERS
from activity_templates import ACTIVITY_TEMPLATES
from safety_templates import SAFETY_TEMPLATES
from confifence_templates import CONFIDENCE_TEMPLATES
class SceneDescriptor:
"""
Generates natural language descriptions of scenes.
Handles scene descriptions, activity inference, and safety concerns identification.
"""
def __init__(self, scene_types=None, object_categories=None):
"""
Initialize the scene descriptor
Args:
scene_types: Dictionary of scene type definitions
"""
self.scene_types = scene_types or {}
self.SCENE_TYPES = scene_types or {}
if object_categories:
self.OBJECT_CATEGORIES = object_categories
else:
# 從 JSON 加載或使用默認值
self.OBJECT_CATEGORIES = self._load_json_data("object_categories") or {
"furniture": [56, 57, 58, 59, 60, 61],
"electronics": [62, 63, 64, 65, 66, 67, 68, 69, 70],
"kitchen_items": [39, 40, 41, 42, 43, 44, 45],
"food": [46, 47, 48, 49, 50, 51, 52, 53, 54, 55],
"vehicles": [1, 2, 3, 4, 5, 6, 7, 8],
"personal_items": [24, 25, 26, 27, 28, 73, 78, 79]
}
# 加載所有模板數據
self._load_templates()
def _load_templates(self):
"""Load all template data from script or fallback to imported defaults"""
self.confidence_templates = CONFIDENCE_TEMPLATES
self.scene_detail_templates = SCENE_DETAIL_TEMPLATES
self.object_template_fillers = OBJECT_TEMPLATE_FILLERS
self.safety_templates = SAFETY_TEMPLATES
self.activity_templates = ACTIVITY_TEMPLATES
def _initialize_fallback_templates(self):
"""Initialize fallback templates when no external data is available"""
# 只在無法從文件或導入加載時使用
self.confidence_templates = {
"high": "{description} {details}",
"medium": "This appears to be {description} {details}",
"low": "This might be {description}, but the confidence is low. {details}"
}
# 僅提供最基本的模板作為後備
self.scene_detail_templates = {
"default": ["A space with various objects."]
}
self.object_template_fillers = {
"default": ["various items"]
}
self.safety_templates = {
"general": "Pay attention to {safety_element}."
}
self.activity_templates = {
"default": ["General activity"]
}
def _get_alternative_scenes(self, scene_scores: Dict[str, float],
threshold: float, top_k: int = 2) -> List[Dict]:
"""
Get alternative scene interpretations with their scores.
Args:
scene_scores: Dictionary of scene type scores
threshold: Minimum confidence threshold
top_k: Number of alternatives to return
Returns:
List of dictionaries with alternative scenes
"""
# Sort scenes by score in descending order
sorted_scenes = sorted(scene_scores.items(), key=lambda x: x[1], reverse=True)
# Skip the first one (best match) and take the next top_k
alternatives = []
for scene_type, score in sorted_scenes[1:1+top_k]:
if score >= threshold:
alternatives.append({
"type": scene_type,
"name": self.SCENE_TYPES.get(scene_type, {}).get("name", "Unknown"),
"confidence": score
})
return alternatives
def _infer_possible_activities(self, scene_type: str, detected_objects: List[Dict]) -> List[str]:
"""
Infer possible activities based on scene type and detected objects.
Args:
scene_type: Identified scene type
detected_objects: List of detected objects
Returns:
List of possible activities
"""
activities = []
if scene_type.startswith("aerial_view_"):
if scene_type == "aerial_view_intersection":
# 使用預定義的十字路口活動
activities.extend(self.activity_templates.get("aerial_view_intersection", []))
# 添加與行人和車輛相關的特定活動
pedestrians = [obj for obj in detected_objects if obj["class_id"] == 0]
vehicles = [obj for obj in detected_objects if obj["class_id"] in [2, 5, 7]] # Car, bus, truck
if pedestrians and vehicles:
activities.append("Waiting for an opportunity to cross the street")
activities.append("Obeying traffic signals")
elif scene_type == "aerial_view_commercial_area":
activities.extend(self.activity_templates.get("aerial_view_commercial_area", []))
elif scene_type == "aerial_view_plaza":
activities.extend(self.activity_templates.get("aerial_view_plaza", []))
else:
# 處理其他未明確定義的空中視角場景
aerial_activities = [
"Street crossing",
"Waiting for signals",
"Following traffic rules",
"Pedestrian movement"
]
activities.extend(aerial_activities)
if scene_type in self.activity_templates:
activities.extend(self.activity_templates[scene_type])
elif "default" in self.activity_templates:
activities.extend(self.activity_templates["default"])
detected_class_ids = [obj["class_id"] for obj in detected_objects]
# Add activities based on specific object combinations
if 62 in detected_class_ids and 57 in detected_class_ids: # TV and sofa
activities.append("Watching shows or movies")
if 63 in detected_class_ids: # laptop
activities.append("Using a computer/laptop")
if 67 in detected_class_ids: # cell phone
activities.append("Using a mobile phone")
if 73 in detected_class_ids: # book
activities.append("Reading")
if any(food_id in detected_class_ids for food_id in [46, 47, 48, 49, 50, 51, 52, 53, 54, 55]):
activities.append("Eating or preparing food")
# Person-specific activities
if 0 in detected_class_ids: # Person
if any(vehicle in detected_class_ids for vehicle in [1, 2, 3, 5, 7]): # Vehicles
activities.append("Commuting or traveling")
if 16 in detected_class_ids: # Dog
activities.append("Walking a dog")
if 24 in detected_class_ids or 26 in detected_class_ids: # Backpack or handbag
activities.append("Carrying personal items")
# Remove duplicates
return list(set(activities))
def _identify_safety_concerns(self, detected_objects: List[Dict], scene_type: str) -> List[str]:
"""
Identify potential safety concerns based on objects and scene type.
Args:
detected_objects: List of detected objects
scene_type: Identified scene type
Returns:
List of potential safety concerns
"""
concerns = []
detected_class_ids = [obj["class_id"] for obj in detected_objects]
# ORIGINAL SAFETY CONCERNS LOGIC
# General safety concerns
if 42 in detected_class_ids or 43 in detected_class_ids: # Fork or knife
concerns.append("Sharp utensils present")
if 76 in detected_class_ids: # Scissors
concerns.append("Cutting tools present")
# Traffic-related concerns
if scene_type in ["city_street", "parking_lot"]:
if 0 in detected_class_ids: # Person
if any(vehicle in detected_class_ids for vehicle in [2, 3, 5, 7, 8]): # Vehicles
concerns.append("Pedestrians near vehicles")
if 9 in detected_class_ids: # Traffic light
concerns.append("Monitor traffic signals")
# Identify crowded scenes
person_count = detected_class_ids.count(0)
if person_count > 5:
concerns.append(f"Crowded area with multiple people ({person_count})")
# Scene-specific concerns
if scene_type == "kitchen":
if 68 in detected_class_ids or 69 in detected_class_ids: # Microwave or oven
concerns.append("Hot cooking equipment")
# Potentially unstable objects
for obj in detected_objects:
if obj["class_id"] in [39, 40, 41, 45]: # Bottle, wine glass, cup, bowl
if obj["region"] in ["top_left", "top_center", "top_right"] and obj["normalized_area"] > 0.05:
concerns.append(f"Elevated {obj['class_name']} might be unstable")
# NEW SAFETY CONCERNS LOGIC FOR ADDITIONAL SCENE TYPES
# Upscale dining safety concerns
if scene_type == "upscale_dining":
# Check for fragile items
if 40 in detected_class_ids: # Wine glass
concerns.append("Fragile glassware present")
# Check for lit candles (can't directly detect but can infer from context)
# Look for small bright spots that might be candles
if any(obj["class_id"] == 41 for obj in detected_objects): # Cup (which might include candle holders)
# We can't reliably detect candles, but if the scene appears to be formal dining,
# we can suggest this as a possibility
concerns.append("Possible lit candles or decorative items requiring care")
# Check for overcrowded table
table_objs = [obj for obj in detected_objects if obj["class_id"] == 60] # Dining table
if table_objs:
table_region = table_objs[0]["region"]
items_on_table = 0
for obj in detected_objects:
if obj["class_id"] in [39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55]:
if obj["region"] == table_region:
items_on_table += 1
if items_on_table > 8:
concerns.append("Dining table has multiple items which should be handled with care")
# Asian commercial street safety concerns
elif scene_type == "asian_commercial_street":
# Check for crowded walkways
if 0 in detected_class_ids: # Person
person_count = detected_class_ids.count(0)
if person_count > 3:
# Calculate person density (simplified)
person_positions = []
for obj in detected_objects:
if obj["class_id"] == 0:
person_positions.append(obj["normalized_center"])
if len(person_positions) >= 2:
# Calculate average distance between people
total_distance = 0
count = 0
for i in range(len(person_positions)):
for j in range(i+1, len(person_positions)):
p1 = person_positions[i]
p2 = person_positions[j]
distance = ((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2)**0.5
total_distance += distance
count += 1
if count > 0:
avg_distance = total_distance / count
if avg_distance < 0.1: # Close proximity
concerns.append("Crowded walkway with limited personal space")
# Check for motorcycles/bicycles near pedestrians
if (1 in detected_class_ids or 3 in detected_class_ids) and 0 in detected_class_ids: # Bicycle/motorcycle and person
concerns.append("Two-wheeled vehicles in pedestrian areas")
# Check for potential trip hazards
# We can't directly detect this, but can infer from context
if scene_type == "asian_commercial_street" and "bottom" in " ".join([obj["region"] for obj in detected_objects if obj["class_id"] == 0]):
# If people are in bottom regions, they might be walking on uneven surfaces
concerns.append("Potential uneven walking surfaces in commercial area")
# Financial district safety concerns
elif scene_type == "financial_district":
# Check for heavy traffic conditions
vehicle_count = sum(1 for obj_id in detected_class_ids if obj_id in [2, 5, 7]) # Car, bus, truck
if vehicle_count > 5:
concerns.append("Heavy vehicle traffic in urban area")
# Check for pedestrians crossing busy streets
if 0 in detected_class_ids: # Person
person_count = detected_class_ids.count(0)
vehicle_nearby = any(vehicle in detected_class_ids for vehicle in [2, 3, 5, 7])
if person_count > 0 and vehicle_nearby:
concerns.append("Pedestrians navigating busy urban traffic")
# Check for traffic signals
if 9 in detected_class_ids: # Traffic light
concerns.append("Observe traffic signals when navigating this area")
else:
# If no traffic lights detected but it's a busy area, it's worth noting
if vehicle_count > 3:
concerns.append("Busy traffic area potentially without visible traffic signals in view")
# Time of day considerations
# We don't have direct time data, but can infer from vehicle lights
vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [2, 5, 7]]
if vehicle_objs and any("lighting_conditions" in obj for obj in detected_objects):
# If vehicles are present and it might be evening/night
concerns.append("Reduced visibility conditions during evening commute")
# Urban intersection safety concerns
elif scene_type == "urban_intersection":
# Check for pedestrians in crosswalks
pedestrian_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [2, 3, 5, 7]]
if pedestrian_objs:
# Calculate distribution of pedestrians to see if they're crossing
pedestrian_positions = [obj["normalized_center"] for obj in pedestrian_objs]
# Simplified check for pedestrians in crossing pattern
if len(pedestrian_positions) >= 3:
# Check if pedestrians are distributed across different regions
pedestrian_regions = set(obj["region"] for obj in pedestrian_objs)
if len(pedestrian_regions) >= 2:
concerns.append("Multiple pedestrians crossing the intersection")
# Check for traffic signal observation
if 9 in detected_class_ids: # Traffic light
concerns.append("Observe traffic signals when crossing")
# Check for busy intersection
if len(vehicle_objs) > 3:
concerns.append("Busy intersection with multiple vehicles")
# Check for pedestrians potentially jay-walking
if pedestrian_objs and not 9 in detected_class_ids: # People but no traffic lights
concerns.append("Pedestrians should use designated crosswalks")
# Visibility concerns based on lighting
# This would be better with actual lighting data
pedestrian_count = len(pedestrian_objs)
if pedestrian_count > 5:
concerns.append("High pedestrian density at crossing points")
# Transit hub safety concerns
elif scene_type == "transit_hub":
# These would be for transit areas like train stations or bus terminals
if 0 in detected_class_ids: # Person
person_count = detected_class_ids.count(0)
if person_count > 8:
concerns.append("Crowded transit area requiring careful navigation")
# Check for luggage/bags that could be trip hazards
if 24 in detected_class_ids or 28 in detected_class_ids: # Backpack or suitcase
concerns.append("Luggage and personal items may create obstacles")
# Public transportation vehicles
if any(vehicle in detected_class_ids for vehicle in [5, 6, 7]): # Bus, train, truck
concerns.append("Stay clear of arriving and departing transit vehicles")
# Shopping district safety concerns
elif scene_type == "shopping_district":
# Check for crowded shopping areas
if 0 in detected_class_ids: # Person
person_count = detected_class_ids.count(0)
if person_count > 5:
concerns.append("Crowded shopping area with multiple people")
# Check for shopping bags and personal items
if 24 in detected_class_ids or 26 in detected_class_ids: # Backpack or handbag
concerns.append("Mind personal belongings in busy retail environment")
# Check for store entrances/exits which might have automatic doors
# We can't directly detect this, but can infer from context
if scene_type == "shopping_district" and 0 in detected_class_ids:
concerns.append("Be aware of store entrances and exits with potential automatic doors")
return concerns
|