""" Model management for Phramer AI By Pariente AI, for MIA TV Series BAGEL 7B integration with professional photography knowledge enhancement """ import spaces import logging import tempfile import os import re from typing import Optional, Dict, Any, Tuple from PIL import Image from gradio_client import Client, handle_file from config import get_device_config, PROFESSIONAL_PHOTOGRAPHY_CONFIG from utils import clean_memory, safe_execute from professional_photography import ( ProfessionalPhotoAnalyzer, enhance_flux_prompt_with_professional_knowledge, professional_analyzer ) logger = logging.getLogger(__name__) class BaseImageAnalyzer: """Base class for image analysis models""" def __init__(self): self.is_initialized = False self.device_config = get_device_config() def initialize(self) -> bool: """Initialize the model""" raise NotImplementedError def analyze_image(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]: """Analyze image and return description""" raise NotImplementedError def cleanup(self) -> None: """Clean up model resources""" clean_memory() class BagelAPIAnalyzer(BaseImageAnalyzer): """BAGEL 7B model with professional photography knowledge integration""" def __init__(self): super().__init__() self.client = None self.space_url = "Malaji71/Bagel-7B-Demo" self.api_endpoint = "/image_understanding" self.hf_token = os.getenv("HF_TOKEN") self.professional_analyzer = professional_analyzer def initialize(self) -> bool: """Initialize BAGEL API client with authentication""" if self.is_initialized: return True try: logger.info("Initializing BAGEL API client for Phramer AI...") # Initialize client with token if available if self.hf_token: logger.info("Using HF token for enhanced API access") self.client = Client(self.space_url, hf_token=self.hf_token) else: logger.info("Using public API access") self.client = Client(self.space_url) self.is_initialized = True logger.info("BAGEL API client initialized successfully") return True except Exception as e: logger.error(f"BAGEL API client initialization failed: {e}") if self.hf_token: logger.info("Retrying without token...") try: self.client = Client(self.space_url) self.is_initialized = True logger.info("BAGEL API client initialized (fallback mode)") return True except Exception as e2: logger.error(f"Fallback initialization failed: {e2}") return False def _create_professional_enhanced_prompt(self, analysis_type: str = "multimodal") -> str: """Create professionally enhanced prompt using complete photography knowledge base""" # Import the complete professional knowledge try: from professional_photography import EXPERT_PHOTOGRAPHY_KNOWLEDGE except ImportError: logger.warning("Professional photography knowledge not available") return self._create_fallback_prompt(analysis_type) # Extract complete knowledge sections safely knowledge_sections = { 'scene_types': EXPERT_PHOTOGRAPHY_KNOWLEDGE.get("scene_types", {}), 'lighting_principles': EXPERT_PHOTOGRAPHY_KNOWLEDGE.get("lighting_principles", {}), 'composition_rules': EXPERT_PHOTOGRAPHY_KNOWLEDGE.get("composition_rules", {}), 'camera_angles': EXPERT_PHOTOGRAPHY_KNOWLEDGE.get("camera_angles", {}), 'photographic_planes': EXPERT_PHOTOGRAPHY_KNOWLEDGE.get("photographic_planes", {}), 'focus_techniques': EXPERT_PHOTOGRAPHY_KNOWLEDGE.get("focus_techniques", {}), 'camera_modes': EXPERT_PHOTOGRAPHY_KNOWLEDGE.get("camera_modes", {}), 'iso_guidelines': EXPERT_PHOTOGRAPHY_KNOWLEDGE.get("iso_guidelines", {}), 'lighting_situations': EXPERT_PHOTOGRAPHY_KNOWLEDGE.get("lighting_situations", {}), 'movement_techniques': EXPERT_PHOTOGRAPHY_KNOWLEDGE.get("movement_techniques", {}), 'specialized_techniques': EXPERT_PHOTOGRAPHY_KNOWLEDGE.get("specialized_techniques", {}) } # Build prompt based on analysis type if analysis_type == "cinematic": return self._build_cinematic_prompt(knowledge_sections) elif analysis_type == "flux_optimized": return self._build_flux_prompt(knowledge_sections) else: return self._build_multimodal_prompt(knowledge_sections) def _build_cinematic_prompt(self, knowledge: Dict[str, Any]) -> str: """Build cinematic analysis prompt with complete professional knowledge""" camera_angles = knowledge.get('camera_angles', {}) photographic_planes = knowledge.get('photographic_planes', {}) lighting_principles = knowledge.get('lighting_principles', {}) composition_rules = knowledge.get('composition_rules', {}) scene_types = knowledge.get('scene_types', {}) iso_guidelines = knowledge.get('iso_guidelines', {}) focus_techniques = knowledge.get('focus_techniques', {}) camera_modes = knowledge.get('camera_modes', {}) prompt = f"""Analyze this image as a master cinematographer with 30+ years of cinema experience. Apply complete professional photography knowledge. Provide exactly two sections: 1. DESCRIPTION: Create a concise, technical analysis for cinematic reproduction using these professional frameworks: CAMERA ANGLES - Identify and apply: • Eye Level Normal: {camera_angles.get("eye_level_normal", {}).get("description", "Camera at subject's eye level")} - {camera_angles.get("eye_level_normal", {}).get("effect", "neutral perspective")}, best for: {camera_angles.get("eye_level_normal", {}).get("best_for", "portraits, documentary")} • Low Angle: {camera_angles.get("low_angle_worms_eye", {}).get("description", "Camera below subject looking up")} - {camera_angles.get("low_angle_worms_eye", {}).get("effect", "subject appears larger")}, psychological impact: {camera_angles.get("low_angle_worms_eye", {}).get("psychological_impact", "dominance, strength")} • High Angle: {camera_angles.get("high_angle_birds_eye", {}).get("description", "Camera above subject looking down")} - {camera_angles.get("high_angle_birds_eye", {}).get("effect", "subject appears smaller")}, best for: {camera_angles.get("high_angle_birds_eye", {}).get("best_for", "environmental context")} • Dutch Angle: {camera_angles.get("dutch_angle", {}).get("description", "Camera tilted off horizontal")} - {camera_angles.get("dutch_angle", {}).get("effect", "dynamic tension")}, creates: {camera_angles.get("dutch_angle", {}).get("psychological_impact", "instability, energy")} PHOTOGRAPHIC PLANES - Apply appropriate framing: • Extreme Wide: {photographic_planes.get("extreme_wide_shot", {}).get("framing", "Subject very small in environment")} - {photographic_planes.get("extreme_wide_shot", {}).get("purpose", "establish location and context")} • Wide Shot: {photographic_planes.get("wide_shot", {}).get("framing", "Full body visible with environment")} - {photographic_planes.get("wide_shot", {}).get("purpose", "show subject in context")} • Medium Shot: {photographic_planes.get("medium_shot", {}).get("framing", "From waist up approximately")} - {photographic_planes.get("medium_shot", {}).get("purpose", "balance between subject and environment")} • Close-up: {photographic_planes.get("close_up", {}).get("framing", "Head and shoulders, tight on face")} - {photographic_planes.get("close_up", {}).get("purpose", "show emotion and expression clearly")} • Extreme Close-up: {photographic_planes.get("extreme_close_up", {}).get("framing", "Part of face or specific detail")} - {photographic_planes.get("extreme_close_up", {}).get("purpose", "intense emotion or specific detail")} COMPOSITION RULES - Apply these techniques: • Rule of Thirds: {composition_rules.get("rule_of_thirds", {}).get("principle", "Divide frame into 9 equal sections")} - {composition_rules.get("rule_of_thirds", {}).get("application", "place key elements on intersection points")} • Leading Lines: {composition_rules.get("leading_lines", {}).get("purpose", "Guide viewer's eye through the image")} - technique: {composition_rules.get("leading_lines", {}).get("technique", "use lines to lead to main subject")} • Depth Layers: {composition_rules.get("depth_layers", {}).get("foreground", "Nearest elements")}, {composition_rules.get("depth_layers", {}).get("middle_ground", "Main subject area")}, {composition_rules.get("depth_layers", {}).get("background", "Context and environment")} LIGHTING ANALYSIS - Identify lighting type and quality: Natural Light Types: • Golden Hour: {lighting_principles.get("natural_light_types", {}).get("golden_hour", {}).get("timing", "First hour after sunrise, last hour before sunset")} - {lighting_principles.get("natural_light_types", {}).get("golden_hour", {}).get("characteristics", "warm, soft, directional")} • Blue Hour: {lighting_principles.get("natural_light_types", {}).get("blue_hour", {}).get("timing", "20-30 minutes after sunset")} - {lighting_principles.get("natural_light_types", {}).get("blue_hour", {}).get("characteristics", "even blue light, dramatic mood")} • Overcast: {lighting_principles.get("natural_light_types", {}).get("overcast", {}).get("characteristics", "soft, even, diffused light")} - advantage: {lighting_principles.get("natural_light_types", {}).get("overcast", {}).get("advantage", "no harsh shadows")} 2. CAMERA_SETUP: Recommend specific professional equipment based on scene analysis using these configurations: SCENE TYPES - Match scene to appropriate setup: Portrait Studio: Equipment: {scene_types.get("portrait_studio", {}).get("equipment", {}).get("camera", "Canon EOS R5")}, {scene_types.get("portrait_studio", {}).get("equipment", {}).get("lens", "85mm f/1.4")}, Settings: {scene_types.get("portrait_studio", {}).get("camera_settings", {}).get("mode", "AV/A")}, {scene_types.get("portrait_studio", {}).get("camera_settings", {}).get("aperture", "f/2.8")}, {scene_types.get("portrait_studio", {}).get("camera_settings", {}).get("iso", "100-400")} Street Photography: Equipment: {scene_types.get("street_photography", {}).get("equipment", {}).get("camera", "Leica M11")}, {scene_types.get("street_photography", {}).get("equipment", {}).get("lens", "35mm f/1.4")}, Settings: {scene_types.get("street_photography", {}).get("camera_settings", {}).get("mode", "TV/S or Program")}, {scene_types.get("street_photography", {}).get("camera_settings", {}).get("aperture", "f/5.6-f/8")}, {scene_types.get("street_photography", {}).get("camera_settings", {}).get("iso", "400-1600")} Landscape: Equipment: {scene_types.get("landscape", {}).get("equipment", {}).get("camera", "Phase One XT")}, {scene_types.get("landscape", {}).get("equipment", {}).get("lens", "24-70mm f/4")}, Settings: {scene_types.get("landscape", {}).get("camera_settings", {}).get("mode", "AV/A or Manual")}, {scene_types.get("landscape", {}).get("camera_settings", {}).get("aperture", "f/8-f/11")}, {scene_types.get("landscape", {}).get("camera_settings", {}).get("iso", "100-400")} Architecture: Equipment: {scene_types.get("architecture", {}).get("equipment", {}).get("camera", "Canon EOS R5")}, {scene_types.get("architecture", {}).get("equipment", {}).get("lens", "24-70mm f/2.8")}, Settings: {scene_types.get("architecture", {}).get("camera_settings", {}).get("mode", "AV/A")}, {scene_types.get("architecture", {}).get("camera_settings", {}).get("aperture", "f/8-f/11")}, {scene_types.get("architecture", {}).get("camera_settings", {}).get("iso", "100-400")} Action Sports: Equipment: {scene_types.get("action_sports", {}).get("equipment", {}).get("camera", "Sony A1")}, {scene_types.get("action_sports", {}).get("equipment", {}).get("lens", "70-200mm f/2.8")}, Settings: {scene_types.get("action_sports", {}).get("camera_settings", {}).get("mode", "TV/S")}, {scene_types.get("action_sports", {}).get("camera_settings", {}).get("aperture", "f/2.8-f/4")}, {scene_types.get("action_sports", {}).get("camera_settings", {}).get("iso", "800-3200")} Apply complete professional cinematography knowledge to generate concise, technically accurate prompt for cinema-quality generation.""" return prompt def _build_flux_prompt(self, knowledge: Dict[str, Any]) -> str: """Build FLUX-optimized prompt with complete professional knowledge""" camera_angles = knowledge.get('camera_angles', {}) lighting_situations = knowledge.get('lighting_situations', {}) composition_rules = knowledge.get('composition_rules', {}) scene_types = knowledge.get('scene_types', {}) camera_modes = knowledge.get('camera_modes', {}) prompt = f"""Analyze this image for FLUX prompt generation using complete professional photography expertise. Apply the full knowledge base for photorealistic output. Provide exactly two sections: 1. DESCRIPTION: Professional technical analysis using complete photography framework: CAMERA ANGLES - Identify specific angle and apply professional knowledge: • Eye Level: {camera_angles.get("eye_level_normal", {}).get("description", "Eye level normal")} - {camera_angles.get("eye_level_normal", {}).get("effect", "neutral perspective")}, best for: {camera_angles.get("eye_level_normal", {}).get("best_for", "portraits, documentary")} • Low Angle: {camera_angles.get("low_angle_worms_eye", {}).get("description", "Low angle worms eye")} - {camera_angles.get("low_angle_worms_eye", {}).get("effect", "subject appears larger")}, best for: {camera_angles.get("low_angle_worms_eye", {}).get("best_for", "architecture, powerful portraits")} • High Angle: {camera_angles.get("high_angle_birds_eye", {}).get("description", "High angle birds eye")} - {camera_angles.get("high_angle_birds_eye", {}).get("effect", "subject appears smaller")}, best for: {camera_angles.get("high_angle_birds_eye", {}).get("best_for", "environmental context, patterns")} LIGHTING SITUATIONS - Match to appropriate lighting condition: • Bright Daylight: ISO {lighting_situations.get("bright_daylight", {}).get("iso", "100-200")} - challenge: {lighting_situations.get("bright_daylight", {}).get("challenge", "harsh shadows")}, solutions: use reflectors, find open shade • Overcast Day: ISO {lighting_situations.get("overcast_day", {}).get("iso", "200-400")} - {lighting_situations.get("overcast_day", {}).get("characteristics", "soft, even light but dimmer")}, advantage: {lighting_situations.get("overcast_day", {}).get("advantage", "natural diffusion")} • Indoor Natural: ISO {lighting_situations.get("indoor_natural_light", {}).get("iso", "800-1600")} - {lighting_situations.get("indoor_natural_light", {}).get("window_light", "excellent for portraits")}, technique: {lighting_situations.get("indoor_natural_light", {}).get("technique", "position subject relative to window")} • Low Light: ISO {lighting_situations.get("low_light_available", {}).get("iso", "1600-6400")} - {lighting_situations.get("low_light_available", {}).get("stabilization", "essential for sharp images")}, technique: {lighting_situations.get("low_light_available", {}).get("technique", "wider apertures, slower movements")} COMPOSITION APPLICATION - Apply these specific rules: • Rule of Thirds: {composition_rules.get("rule_of_thirds", {}).get("principle", "Divide frame into 9 equal sections")} - {composition_rules.get("rule_of_thirds", {}).get("subject_placement", "eyes on upper third line for portraits")}, {composition_rules.get("rule_of_thirds", {}).get("horizon_placement", "upper or lower third for landscapes")} • Leading Lines: {composition_rules.get("leading_lines", {}).get("purpose", "Guide viewer's eye through the image")} - technique: {composition_rules.get("leading_lines", {}).get("technique", "use lines to lead to main subject")} • Depth Layers: {composition_rules.get("depth_layers", {}).get("technique", "Create separation between layers")} - {composition_rules.get("depth_layers", {}).get("foreground", "Nearest elements")}, {composition_rules.get("depth_layers", {}).get("middle_ground", "Main subject area")}, {composition_rules.get("depth_layers", {}).get("background", "Context and environment")} 2. CAMERA_SETUP: Apply complete professional equipment knowledge: SCENE TYPE MATCHING - Select appropriate configuration: Portrait Studio: Equipment: {scene_types.get("portrait_studio", {}).get("equipment", {}).get("camera", "Canon EOS R5")}, {scene_types.get("portrait_studio", {}).get("equipment", {}).get("lens", "85mm f/1.4")}, Camera settings: {scene_types.get("portrait_studio", {}).get("camera_settings", {}).get("mode", "AV/A")}, {scene_types.get("portrait_studio", {}).get("camera_settings", {}).get("aperture", "f/2.8")}, {scene_types.get("portrait_studio", {}).get("camera_settings", {}).get("iso", "100-400")}, Focus: {scene_types.get("portrait_studio", {}).get("camera_settings", {}).get("focus", "single point AF on eyes")} Portrait Exterior: Equipment: {scene_types.get("portrait_exterior", {}).get("equipment", {}).get("camera", "Canon EOS R6")}, {scene_types.get("portrait_exterior", {}).get("equipment", {}).get("lens", "85mm f/1.4")}, Camera settings: {scene_types.get("portrait_exterior", {}).get("camera_settings", {}).get("mode", "AV/A")}, {scene_types.get("portrait_exterior", {}).get("camera_settings", {}).get("aperture", "f/2.8-f/4")}, {scene_types.get("portrait_exterior", {}).get("camera_settings", {}).get("iso", "100-800")}, {scene_types.get("portrait_exterior", {}).get("camera_settings", {}).get("exposure_compensation", "+0.3 to +0.7 for faces")} Street Photography: Equipment: {scene_types.get("street_photography", {}).get("equipment", {}).get("camera", "Leica M11")}, {scene_types.get("street_photography", {}).get("equipment", {}).get("lens", "35mm f/1.4")}, Camera settings: {scene_types.get("street_photography", {}).get("camera_settings", {}).get("mode", "TV/S or Program")}, {scene_types.get("street_photography", {}).get("camera_settings", {}).get("shutter_speed", "1/125s minimum")}, {scene_types.get("street_photography", {}).get("camera_settings", {}).get("aperture", "f/5.6-f/8")}, {scene_types.get("street_photography", {}).get("camera_settings", {}).get("iso", "400-1600")} Landscape: Equipment: {scene_types.get("landscape", {}).get("equipment", {}).get("camera", "Phase One XT")}, {scene_types.get("landscape", {}).get("equipment", {}).get("lens", "24-70mm f/4")}, Camera settings: {scene_types.get("landscape", {}).get("camera_settings", {}).get("mode", "AV/A or Manual")}, {scene_types.get("landscape", {}).get("camera_settings", {}).get("aperture", "f/8-f/11")}, {scene_types.get("landscape", {}).get("camera_settings", {}).get("iso", "100-400")}, {scene_types.get("landscape", {}).get("camera_settings", {}).get("focus", "hyperfocal distance or infinity")} Architecture: Equipment: {scene_types.get("architecture", {}).get("equipment", {}).get("camera", "Canon EOS R5")}, {scene_types.get("architecture", {}).get("equipment", {}).get("lens", "24-70mm f/2.8")}, Camera settings: {scene_types.get("architecture", {}).get("camera_settings", {}).get("mode", "AV/A")}, {scene_types.get("architecture", {}).get("camera_settings", {}).get("aperture", "f/8-f/11")}, {scene_types.get("architecture", {}).get("camera_settings", {}).get("iso", "100-400")}, {scene_types.get("architecture", {}).get("camera_settings", {}).get("perspective_correction", "use tilt-shift when available")} Action Sports: Equipment: {scene_types.get("action_sports", {}).get("equipment", {}).get("camera", "Sony A1")}, {scene_types.get("action_sports", {}).get("equipment", {}).get("lens", "70-200mm f/2.8")}, Camera settings: {scene_types.get("action_sports", {}).get("camera_settings", {}).get("mode", "TV/S")}, {scene_types.get("action_sports", {}).get("camera_settings", {}).get("shutter_speed", "1/500s+ to freeze motion")}, {scene_types.get("action_sports", {}).get("camera_settings", {}).get("aperture", "f/2.8-f/4")}, {scene_types.get("action_sports", {}).get("camera_settings", {}).get("iso", "800-3200")} CAMERA MODES - Apply appropriate control: • Aperture Priority: {camera_modes.get("aperture_priority", {}).get("mode_designation", "AV (Canon) / A (Nikon)")} - photographer sets {camera_modes.get("aperture_priority", {}).get("photographer_sets", "aperture value")}, camera sets {camera_modes.get("aperture_priority", {}).get("camera_sets", "shutter speed")}, best for: {camera_modes.get("aperture_priority", {}).get("best_for", "controlling depth of field")} • Shutter Priority: {camera_modes.get("shutter_priority", {}).get("mode_designation", "TV (Canon) / S (Nikon)")} - photographer sets {camera_modes.get("shutter_priority", {}).get("photographer_sets", "shutter speed")}, camera sets {camera_modes.get("shutter_priority", {}).get("camera_sets", "aperture")}, best for: {camera_modes.get("shutter_priority", {}).get("best_for", "controlling motion")} • Manual Mode: {camera_modes.get("manual_mode", {}).get("photographer_sets", "Both aperture and shutter speed")} - when to use: consistent lighting, studio work, advantage: {camera_modes.get("manual_mode", {}).get("advantage", "complete creative control")} Generate technically precise content optimized for FLUX's photorealistic capabilities using complete professional knowledge.""" return prompt def _build_multimodal_prompt(self, knowledge: Dict[str, Any]) -> str: """Build multimodal analysis prompt with complete professional knowledge""" prompt = """Analyze this image with professional cinematography expertise for multi-platform prompt generation. You are a master cinematographer with extensive technical and artistic knowledge from 30+ years in cinema. Provide exactly two sections: 1. DESCRIPTION: Expert visual analysis for prompt generation: - Comprehensive scene description with photographic insight - Subject matter, composition, and visual hierarchy - Lighting analysis: quality, direction, mood, technical setup - Color palette, contrast, and tonal relationships - Artistic elements: style, mood, atmosphere, visual impact - Technical photographic qualities and execution 2. CAMERA_SETUP: Professional equipment and technique recommendation: - Camera system recommendation based on scene requirements - Lens selection with specific focal length and aperture range - Technical shooting parameters and considerations - Lighting setup and methodology for scene recreation - Professional approach: shooting style and technical execution Apply master-level cinematography knowledge: advanced composition techniques, professional lighting principles, camera system expertise, lens characteristics, and technical excellence. Create content suitable for multiple generative engines (Flux, Midjourney, etc.) with emphasis on photorealistic quality.""" return prompt def _create_fallback_prompt(self, analysis_type: str) -> str: """Create fallback prompt when professional knowledge is not available""" if analysis_type == "cinematic": return """Analyze this image as a professional cinematographer. Provide exactly two sections: 1. DESCRIPTION: Create a detailed, flowing paragraph describing the image for cinematic reproduction: - Scene composition and visual storytelling elements - Lighting quality, direction, and dramatic mood - Color palette, tonal relationships, and atmospheric elements - Subject positioning, environmental context, and framing - Cinematic qualities: film grain, depth of field, visual style - Technical photographic elements that enhance realism 2. CAMERA_SETUP: Recommend professional cinema/photography equipment based on scene analysis: - Camera body: Choose from Canon EOS R5/R6, Sony A7R/A1, Leica M11, ARRI Alexa, RED cameras - Lens: Specific focal length and aperture (e.g., "85mm f/1.4", "35mm anamorphic f/2.8") - Technical settings: Aperture consideration for depth of field and story mood - Lighting setup: Professional lighting rationale (key, fill, rim, practical lights) - Shooting style: Documentary, portrait, landscape, architectural, or cinematic approach Apply professional cinematography principles: rule of thirds, leading lines, depth layering, lighting direction for mood, and technical excellence. Focus on creating prompts optimized for photorealistic, cinema-quality generation.""" elif analysis_type == "flux_optimized": return """Analyze this image for FLUX prompt generation with professional cinematography expertise. Provide exactly two sections: 1. DESCRIPTION: Create a detailed technical description optimized for FLUX generation: - Scene elements and composition with precise technical language - Lighting setup and quality with specific technical terms - Camera angle and perspective with professional terminology - Color grading and tonal balance for photorealistic output - Depth of field and focus characteristics - Professional photographic style and execution 2. CAMERA_SETUP: Recommend specific professional equipment for FLUX optimization: - Professional camera body with model specifications - Lens specifications with focal length and aperture - ISO settings and technical parameters - Professional lighting setup and rationale - Shooting technique and professional approach Focus on technical precision and professional terminology optimized for FLUX's photorealistic capabilities.""" else: # multimodal analysis return """Analyze this image with professional cinematography expertise for multi-platform prompt generation. Provide exactly two sections: 1. DESCRIPTION: Expert visual analysis for prompt generation: - Comprehensive scene description with photographic insight - Subject matter, composition, and visual hierarchy - Lighting analysis: quality, direction, mood, technical setup - Color palette, contrast, and tonal relationships - Artistic elements: style, mood, atmosphere, visual impact - Technical photographic qualities and execution 2. CAMERA_SETUP: Professional equipment and technique recommendation: - Camera system recommendation based on scene requirements - Lens selection with specific focal length and aperture range - Technical shooting parameters and considerations - Lighting setup and methodology for scene recreation - Professional approach: shooting style and technical execution Apply master-level cinematography knowledge: advanced composition techniques, professional lighting principles, camera system expertise, lens characteristics, and technical excellence. Create content suitable for multiple generative engines (Flux, Midjourney, etc.) with emphasis on photorealistic quality.""" def _extract_professional_camera_setup(self, description: str) -> Optional[str]: """Extract and enhance camera setup with professional photography knowledge""" try: camera_setup = None # Extract BAGEL's camera recommendation if "CAMERA_SETUP:" in description: parts = description.split("CAMERA_SETUP:") if len(parts) > 1: camera_section = parts[1].strip() camera_text = camera_section.split('\n')[0].strip() if len(camera_text) > 20: camera_setup = self._parse_professional_camera_recommendation(camera_text) elif "2. CAMERA_SETUP" in description: parts = description.split("2. CAMERA_SETUP") if len(parts) > 1: camera_section = parts[1].strip() camera_text = camera_section.split('\n')[0].strip() if len(camera_text) > 20: camera_setup = self._parse_professional_camera_recommendation(camera_text) # Fallback: look for camera recommendations in text if not camera_setup: camera_setup = self._find_professional_camera_recommendation(description) return camera_setup except Exception as e: logger.warning(f"Failed to extract professional camera setup: {e}") return None def _parse_professional_camera_recommendation(self, camera_text: str) -> Optional[str]: """Parse camera recommendation with professional photography enhancement""" try: # Clean and extract with professional patterns camera_text = re.sub(r'^(Based on.*?recommend|I would recommend|For this.*?recommend)\s*', '', camera_text, flags=re.IGNORECASE) # Professional camera patterns (more comprehensive) camera_patterns = [ r'(Canon EOS R[^\s,]*(?:\s+[^\s,]*)?)', r'(Sony A[^\s,]*(?:\s+[^\s,]*)?)', r'(Leica [^\s,]+)', r'(Hasselblad [^\s,]+)', r'(Phase One [^\s,]+)', r'(Fujifilm [^\s,]+)', r'(ARRI [^\s,]+)', r'(RED [^\s,]+)', r'(Nikon [^\s,]+)' ] camera_model = None for pattern in camera_patterns: match = re.search(pattern, camera_text, re.IGNORECASE) if match: camera_model = match.group(1).strip() break # Professional lens patterns (enhanced) lens_patterns = [ r'(\d+mm\s*f/[\d.]+(?:\s*(?:lens|anamorphic|telephoto|wide))?)', r'(\d+-\d+mm\s*f/[\d.]+(?:\s*lens)?)', r'(with\s+(?:a\s+)?(\d+mm[^,.]*))', r'(paired with.*?(\d+mm[^,.]*))', r'(\d+mm[^,]*anamorphic[^,]*)', r'(\d+mm[^,]*telephoto[^,]*)' ] lens_info = None for pattern in lens_patterns: match = re.search(pattern, camera_text, re.IGNORECASE) if match: lens_info = match.group(1).strip() lens_info = re.sub(r'^(with\s+(?:a\s+)?|paired with\s+)', '', lens_info, flags=re.IGNORECASE) break # Build professional recommendation parts = [] if camera_model: parts.append(camera_model) if lens_info: parts.append(lens_info) if parts: result = ', '.join(parts) logger.info(f"Professional camera setup extracted: {result}") return result return None except Exception as e: logger.warning(f"Failed to parse professional camera recommendation: {e}") return None def _find_professional_camera_recommendation(self, text: str) -> Optional[str]: """Find professional camera recommendations with enhanced detection""" try: sentences = re.split(r'[.!?]', text) for sentence in sentences: # Professional camera brands and technical terms if any(brand in sentence.lower() for brand in ['canon', 'sony', 'leica', 'hasselblad', 'phase one', 'fujifilm', 'arri', 'red']): if any(term in sentence.lower() for term in ['recommend', 'suggest', 'would use', 'camera', 'lens', 'shot on']): parsed = self._parse_professional_camera_recommendation(sentence.strip()) if parsed: return parsed return None except Exception as e: logger.warning(f"Failed to find professional camera recommendation: {e}") return None def _enhance_description_with_professional_context(self, description: str, image: Image.Image) -> str: """Enhance BAGEL description with professional cinematography context""" try: if not PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("enable_expert_analysis", True): return description # Get professional cinematography context without being invasive enhanced_context = self.professional_analyzer.generate_enhanced_context(description) # Extract key professional insights scene_type = enhanced_context.get("scene_type", "general") technical_context = enhanced_context.get("technical_context", "") professional_insight = enhanced_context.get("professional_insight", "") # Enhance description subtly with professional terminology enhanced_description = description # Add professional context if not already present if technical_context and len(technical_context) > 20: # Only add if it doesn't duplicate existing information if not any(term in description.lower() for term in ["shot on", "professional", "camera"]): enhanced_description += f"\n\nProfessional Context: {technical_context}" logger.info(f"Enhanced description with cinematography context for {scene_type} scene") return enhanced_description except Exception as e: logger.warning(f"Cinematography context enhancement failed: {e}") return description def _save_temp_image(self, image: Image.Image) -> str: """Save image to temporary file for API call""" try: temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png') temp_path = temp_file.name temp_file.close() if image.mode != 'RGB': image = image.convert('RGB') image.save(temp_path, 'PNG') return temp_path except Exception as e: logger.error(f"Failed to save temporary image: {e}") return None def _cleanup_temp_file(self, file_path: str): """Clean up temporary file""" try: if file_path and os.path.exists(file_path): os.unlink(file_path) except Exception as e: logger.warning(f"Failed to cleanup temp file: {e}") @spaces.GPU(duration=60) def analyze_image(self, image: Image.Image, prompt: str = None) -> Tuple[str, Dict[str, Any]]: """Analyze image using BAGEL API with professional cinematography enhancement""" if not self.is_initialized: success = self.initialize() if not success: return "BAGEL API not available", {"error": "API initialization failed"} temp_path = None metadata = { "model": "BAGEL-7B-Professional", "device": "api", "confidence": 0.9, "api_endpoint": self.api_endpoint, "space_url": self.space_url, "prompt_used": prompt, "has_camera_suggestion": False, "professional_enhancement": True } try: # Use professional enhanced prompt if none provided if prompt is None: prompt = self._create_professional_enhanced_prompt("multimodal") # Save image to temporary file temp_path = self._save_temp_image(image) if not temp_path: return "Image processing failed", {"error": "Could not save image"} logger.info("Calling BAGEL API with professional cinematography context...") # Call BAGEL API with enhanced prompt result = self.client.predict( image=handle_file(temp_path), prompt=prompt, show_thinking=False, do_sample=False, text_temperature=0.2, max_new_tokens=512, api_name=self.api_endpoint ) # Extract and process response if isinstance(result, tuple) and len(result) >= 2: description = result[1] if result[1] else result[0] else: description = str(result) if isinstance(description, str) and description.strip(): description = description.strip() # Extract professional camera setup camera_setup = self._extract_professional_camera_setup(description) if camera_setup: metadata["camera_setup"] = camera_setup metadata["has_camera_suggestion"] = True logger.info(f"Professional camera setup extracted: {camera_setup}") else: metadata["has_camera_suggestion"] = False logger.info("No camera setup found, will use professional fallback") # Enhance description with cinematography context if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("knowledge_base_integration", True): description = self._enhance_description_with_professional_context(description, image) metadata["cinematography_context_applied"] = True else: description = "Professional image analysis completed successfully" metadata["has_camera_suggestion"] = False # Update metadata metadata.update({ "response_length": len(description), "analysis_type": "professional_enhanced" }) logger.info(f"BAGEL Professional analysis complete: {len(description)} chars, Camera: {metadata.get('has_camera_suggestion', False)}") return description, metadata except Exception as e: logger.error(f"BAGEL Professional analysis failed: {e}") return "Professional analysis failed", {"error": str(e), "model": "BAGEL-7B-Professional"} finally: if temp_path: self._cleanup_temp_file(temp_path) def analyze_for_cinematic_prompt(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]: """Analyze image specifically for cinematic/MIA TV Series prompt generation""" cinematic_prompt = self._create_professional_enhanced_prompt("cinematic") return self.analyze_image(image, cinematic_prompt) def analyze_for_flux_with_professional_context(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]: """Analyze image for FLUX with enhanced professional cinematography context""" flux_prompt = self._create_professional_enhanced_prompt("flux_optimized") return self.analyze_image(image, flux_prompt) def analyze_for_multiengine_prompt(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]: """Analyze image for multi-engine compatibility (Flux, Midjourney, etc.)""" multiengine_prompt = self._create_professional_enhanced_prompt("multimodal") return self.analyze_image(image, multiengine_prompt) def cleanup(self) -> None: """Clean up API client resources""" try: if hasattr(self, 'client'): self.client = None super().cleanup() logger.info("BAGEL Professional API resources cleaned up") except Exception as e: logger.warning(f"BAGEL Professional API cleanup warning: {e}") class FallbackAnalyzer(BaseImageAnalyzer): """Enhanced fallback analyzer with basic professional cinematography principles""" def __init__(self): super().__init__() self.professional_analyzer = professional_analyzer def initialize(self) -> bool: """Fallback with cinematography enhancement is always ready""" self.is_initialized = True return True def analyze_image(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]: """Provide enhanced image description with cinematography context""" try: width, height = image.size mode = image.mode aspect_ratio = width / height # Enhanced scene detection if aspect_ratio > 1.5: orientation = "landscape" scene_type = "landscape" camera_suggestion = "Phase One XT with 24-70mm f/4 lens, landscape photography" elif aspect_ratio < 0.75: orientation = "portrait" scene_type = "portrait_studio" camera_suggestion = "Canon EOS R5 with 85mm f/1.4 lens, portrait photography" else: orientation = "square" scene_type = "general" camera_suggestion = "Canon EOS R6 with 50mm f/1.8 lens, standard photography" # Generate professional description description = f"A {orientation} format professional photograph with balanced composition and technical excellence. The image demonstrates clear visual hierarchy and professional execution, suitable for high-quality reproduction across multiple generative platforms. Recommended professional setup: {camera_suggestion}, with careful attention to exposure, lighting, and artistic composition." # Add cinematography context if available try: if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("enable_expert_analysis", True): enhanced_context = self.professional_analyzer.generate_enhanced_context(description) technical_context = enhanced_context.get("technical_context", "") if technical_context: description += f" Cinematography context: {technical_context}" except Exception as e: logger.warning(f"Cinematography context enhancement failed in fallback: {e}") metadata = { "model": "Professional-Fallback", "device": "cpu", "confidence": 0.7, "image_size": f"{width}x{height}", "color_mode": mode, "orientation": orientation, "aspect_ratio": round(aspect_ratio, 2), "scene_type": scene_type, "has_camera_suggestion": True, "camera_setup": camera_suggestion, "professional_enhancement": True } return description, metadata except Exception as e: logger.error(f"Professional fallback analysis failed: {e}") return "Professional image suitable for detailed analysis and multi-engine prompt generation", { "error": str(e), "model": "Professional-Fallback" } class ModelManager: """Enhanced manager for handling image analysis models with professional cinematography integration""" def __init__(self, preferred_model: str = "bagel-professional"): self.preferred_model = preferred_model self.analyzers = {} self.current_analyzer = None def get_analyzer(self, model_name: str = None) -> Optional[BaseImageAnalyzer]: """Get or create analyzer for specified model""" model_name = model_name or self.preferred_model if model_name not in self.analyzers: if model_name in ["bagel-api", "bagel-professional"]: self.analyzers[model_name] = BagelAPIAnalyzer() elif model_name == "fallback": self.analyzers[model_name] = FallbackAnalyzer() else: logger.warning(f"Unknown model: {model_name}, using professional fallback") model_name = "fallback" self.analyzers[model_name] = FallbackAnalyzer() return self.analyzers[model_name] def analyze_image(self, image: Image.Image, model_name: str = None, analysis_type: str = "multiengine") -> Tuple[str, Dict[str, Any]]: """Analyze image with professional cinematography enhancement""" analyzer = self.get_analyzer(model_name) if analyzer is None: return "No analyzer available", {"error": "Model not found"} # Choose analysis method based on type and analyzer capabilities if analysis_type == "cinematic" and hasattr(analyzer, 'analyze_for_cinematic_prompt'): success, result = safe_execute(analyzer.analyze_for_cinematic_prompt, image) elif analysis_type == "flux" and hasattr(analyzer, 'analyze_for_flux_with_professional_context'): success, result = safe_execute(analyzer.analyze_for_flux_with_professional_context, image) elif analysis_type == "multiengine" and hasattr(analyzer, 'analyze_for_multiengine_prompt'): success, result = safe_execute(analyzer.analyze_for_multiengine_prompt, image) else: success, result = safe_execute(analyzer.analyze_image, image) if success and result[1].get("error") is None: return result else: # Enhanced fallback with cinematography context logger.warning(f"Primary model failed, using cinematography-enhanced fallback: {result}") fallback_analyzer = self.get_analyzer("fallback") fallback_success, fallback_result = safe_execute(fallback_analyzer.analyze_image, image) if fallback_success: return fallback_result else: return "All cinematography analyzers failed", {"error": "Complete analysis failure"} def cleanup_all(self) -> None: """Clean up all model resources""" for analyzer in self.analyzers.values(): analyzer.cleanup() self.analyzers.clear() clean_memory() logger.info("All cinematography analyzers cleaned up") # Global model manager instance with cinematography enhancement model_manager = ModelManager(preferred_model="bagel-professional") def analyze_image(image: Image.Image, model_name: str = None, analysis_type: str = "multiengine") -> Tuple[str, Dict[str, Any]]: """ Enhanced convenience function for professional cinematography analysis Args: image: PIL Image to analyze model_name: Optional model name ("bagel-professional", "fallback") analysis_type: Type of analysis ("multiengine", "cinematic", "flux") Returns: Tuple of (description, metadata) with professional cinematography enhancement """ return model_manager.analyze_image(image, model_name, analysis_type) # Export main components __all__ = [ "BaseImageAnalyzer", "BagelAPIAnalyzer", "FallbackAnalyzer", "ModelManager", "model_manager", "analyze_image" ]