Malaji71 commited on
Commit
d40d75f
·
verified ·
1 Parent(s): 2f71b86

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +252 -152
models.py CHANGED
@@ -1,6 +1,8 @@
1
  """
2
- Model management for Frame 0 Laboratory for MIA
3
- BAGEL 7B integration via API calls
 
 
4
  """
5
 
6
  import spaces
@@ -12,8 +14,13 @@ from typing import Optional, Dict, Any, Tuple
12
  from PIL import Image
13
  from gradio_client import Client, handle_file
14
 
15
- from config import get_device_config
16
  from utils import clean_memory, safe_execute
 
 
 
 
 
17
 
18
  logger = logging.getLogger(__name__)
19
 
@@ -39,14 +46,15 @@ class BaseImageAnalyzer:
39
 
40
 
41
  class BagelAPIAnalyzer(BaseImageAnalyzer):
42
- """BAGEL 7B model via API calls to working Space"""
43
 
44
  def __init__(self):
45
  super().__init__()
46
  self.client = None
47
  self.space_url = "Malaji71/Bagel-7B-Demo"
48
  self.api_endpoint = "/image_understanding"
49
- self.hf_token = os.getenv("HF_TOKEN") # Get token from environment/secrets
 
50
 
51
  def initialize(self) -> bool:
52
  """Initialize BAGEL API client with authentication"""
@@ -54,14 +62,14 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
54
  return True
55
 
56
  try:
57
- logger.info("Initializing BAGEL API client...")
58
 
59
- # Initialize client with token if available (for private spaces)
60
  if self.hf_token:
61
- logger.info("Using HF token for private space access")
62
  self.client = Client(self.space_url, hf_token=self.hf_token)
63
  else:
64
- logger.info("No HF token found, accessing public space")
65
  self.client = Client(self.space_url)
66
 
67
  self.is_initialized = True
@@ -70,66 +78,130 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
70
 
71
  except Exception as e:
72
  logger.error(f"BAGEL API client initialization failed: {e}")
73
- # If private space fails, try without token as fallback
74
  if self.hf_token:
75
  logger.info("Retrying without token...")
76
  try:
77
  self.client = Client(self.space_url)
78
  self.is_initialized = True
79
- logger.info("BAGEL API client initialized successfully (fallback to public)")
80
  return True
81
  except Exception as e2:
82
- logger.error(f"Fallback initialization also failed: {e2}")
83
  return False
84
 
85
- def _extract_camera_setup(self, description: str) -> Optional[str]:
86
- """Extract camera setup recommendation from BAGEL response with improved parsing"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  try:
88
- # Look for CAMERA_SETUP section first
 
 
89
  if "CAMERA_SETUP:" in description:
90
  parts = description.split("CAMERA_SETUP:")
91
  if len(parts) > 1:
92
  camera_section = parts[1].strip()
93
- # Take the first meaningful sentence from camera setup
94
  camera_text = camera_section.split('\n')[0].strip()
95
- if len(camera_text) > 20: # Ensure meaningful content
96
- return self._parse_camera_recommendation(camera_text)
97
 
98
- # Look for "2. CAMERA_SETUP" pattern
99
- if "2. CAMERA_SETUP" in description:
100
  parts = description.split("2. CAMERA_SETUP")
101
  if len(parts) > 1:
102
  camera_section = parts[1].strip()
103
  camera_text = camera_section.split('\n')[0].strip()
104
  if len(camera_text) > 20:
105
- return self._parse_camera_recommendation(camera_text)
106
 
107
- # Look for camera recommendations within the text
108
- camera_recommendation = self._find_camera_recommendation(description)
109
- if camera_recommendation:
110
- return camera_recommendation
111
 
112
- return None
113
 
114
  except Exception as e:
115
- logger.warning(f"Failed to extract camera setup: {e}")
116
  return None
117
 
118
- def _parse_camera_recommendation(self, camera_text: str) -> Optional[str]:
119
- """Parse and extract specific camera and lens information"""
120
  try:
121
- # Remove common prefixes and clean text
122
  camera_text = re.sub(r'^(Based on.*?recommend|I would recommend|For this.*?recommend)\s*', '', camera_text, flags=re.IGNORECASE)
123
- camera_text = re.sub(r'^(using a|use a|cameras? like)\s*', '', camera_text, flags=re.IGNORECASE)
124
 
125
- # Extract camera model with specific patterns
126
  camera_patterns = [
127
- r'(Canon EOS [R\d]+[^\s,]*(?:\s+[IVX]+)?)',
128
- r'(Sony A[^\s,]+(?:\s+[IVX]+)?)',
129
  r'(Leica [^\s,]+)',
130
  r'(Hasselblad [^\s,]+)',
131
  r'(Phase One [^\s,]+)',
132
- r'(Fujifilm [^\s,]+)'
 
 
 
133
  ]
134
 
135
  camera_model = None
@@ -139,12 +211,14 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
139
  camera_model = match.group(1).strip()
140
  break
141
 
142
- # Extract lens information with improved patterns
143
  lens_patterns = [
144
- r'(\d+mm\s*f/[\d.]+(?:\s*lens)?)',
145
  r'(\d+-\d+mm\s*f/[\d.]+(?:\s*lens)?)',
146
  r'(with\s+(?:a\s+)?(\d+mm[^,.]*))',
147
- r'(paired with.*?(\d+mm[^,.]*))'
 
 
148
  ]
149
 
150
  lens_info = None
@@ -155,14 +229,7 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
155
  lens_info = re.sub(r'^(with\s+(?:a\s+)?|paired with\s+)', '', lens_info, flags=re.IGNORECASE)
156
  break
157
 
158
- # Extract aperture if not in lens info
159
- if not lens_info or 'f/' not in lens_info:
160
- aperture_match = re.search(r'(f/[\d.]+)', camera_text)
161
- aperture = aperture_match.group(1) if aperture_match else None
162
- if aperture and lens_info:
163
- lens_info = f"{lens_info} {aperture}"
164
-
165
- # Build clean recommendation
166
  parts = []
167
  if camera_model:
168
  parts.append(camera_model)
@@ -171,44 +238,71 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
171
 
172
  if parts:
173
  result = ', '.join(parts)
174
- logger.info(f"Parsed camera recommendation: {result}")
175
  return result
176
 
177
  return None
178
 
179
  except Exception as e:
180
- logger.warning(f"Failed to parse camera recommendation: {e}")
181
  return None
182
 
183
- def _find_camera_recommendation(self, text: str) -> Optional[str]:
184
- """Find camera recommendations anywhere in the text"""
185
  try:
186
- # Look for sentences containing camera info
187
  sentences = re.split(r'[.!?]', text)
188
 
189
  for sentence in sentences:
190
- # Check if sentence contains camera info
191
- if any(brand in sentence.lower() for brand in ['canon', 'sony', 'leica', 'hasselblad', 'phase one', 'fujifilm']):
192
- if any(term in sentence.lower() for term in ['recommend', 'suggest', 'would use', 'camera', 'lens']):
193
- parsed = self._parse_camera_recommendation(sentence.strip())
194
  if parsed:
195
  return parsed
196
 
197
  return None
198
 
199
  except Exception as e:
200
- logger.warning(f"Failed to find camera recommendation: {e}")
201
  return None
202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  def _save_temp_image(self, image: Image.Image) -> str:
204
  """Save image to temporary file for API call"""
205
  try:
206
- # Create temporary file
207
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
208
  temp_path = temp_file.name
209
  temp_file.close()
210
 
211
- # Save image
212
  if image.mode != 'RGB':
213
  image = image.convert('RGB')
214
  image.save(temp_path, 'PNG')
@@ -229,53 +323,37 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
229
 
230
  @spaces.GPU(duration=60)
231
  def analyze_image(self, image: Image.Image, prompt: str = None) -> Tuple[str, Dict[str, Any]]:
232
- """Analyze image using BAGEL API"""
233
  if not self.is_initialized:
234
  success = self.initialize()
235
  if not success:
236
  return "BAGEL API not available", {"error": "API initialization failed"}
237
 
238
  temp_path = None
239
-
240
- # Initialize metadata early
241
  metadata = {
242
- "model": "BAGEL-7B-API",
243
  "device": "api",
244
  "confidence": 0.9,
245
  "api_endpoint": self.api_endpoint,
246
  "space_url": self.space_url,
247
  "prompt_used": prompt,
248
- "has_camera_suggestion": False
 
249
  }
250
 
251
  try:
252
- # Enhanced prompt for better structured output
253
  if prompt is None:
254
- prompt = """Analyze this image for professional photography reproduction. Provide exactly two sections:
255
-
256
- 1. DESCRIPTION: Write a single flowing paragraph describing what you see. Start directly with the subject (e.g., "A color photograph showing..." or "A black and white image depicting..."). Include:
257
- - Image type (photograph, illustration, artwork)
258
- - Subject and composition
259
- - Color palette and lighting conditions
260
- - Mood and atmosphere
261
- - Photographic style and format
262
-
263
- 2. CAMERA_SETUP: Based on the scene type you observe, recommend ONE specific professional camera and lens combination:
264
- - For street/documentary scenes: Canon EOS R6 with 35mm f/1.4 lens
265
- - For portrait photography: Canon EOS R5 with 85mm f/1.4 lens
266
- - For landscape photography: Phase One XT with 24-70mm f/4 lens
267
- - For action/sports: Sony A1 with 70-200mm f/2.8 lens
268
-
269
- Give only the camera model and lens specification, nothing else."""
270
 
271
  # Save image to temporary file
272
  temp_path = self._save_temp_image(image)
273
  if not temp_path:
274
  return "Image processing failed", {"error": "Could not save image"}
275
 
276
- logger.info("Calling BAGEL API for image analysis...")
277
 
278
- # Call BAGEL API
279
  result = self.client.predict(
280
  image=handle_file(temp_path),
281
  prompt=prompt,
@@ -286,132 +364,151 @@ Give only the camera model and lens specification, nothing else."""
286
  api_name=self.api_endpoint
287
  )
288
 
289
- # Extract response (API returns tuple: (image_result, text_response))
290
  if isinstance(result, tuple) and len(result) >= 2:
291
  description = result[1] if result[1] else result[0]
292
  else:
293
  description = str(result)
294
 
295
- # Process the description and extract camera setup
296
  if isinstance(description, str) and description.strip():
297
  description = description.strip()
298
 
299
- # Extract camera setup with improved parsing
300
- camera_setup = self._extract_camera_setup(description)
301
  if camera_setup:
302
  metadata["camera_setup"] = camera_setup
303
  metadata["has_camera_suggestion"] = True
304
- logger.info(f"Extracted camera setup: {camera_setup}")
305
  else:
306
  metadata["has_camera_suggestion"] = False
307
- logger.warning("No valid camera setup found in BAGEL response")
 
 
 
 
 
 
308
  else:
309
- description = "Detailed image analysis completed successfully"
310
  metadata["has_camera_suggestion"] = False
311
 
312
- # Update final metadata
313
  metadata.update({
314
- "response_length": len(description)
 
315
  })
316
 
317
- logger.info(f"BAGEL API analysis complete: {len(description)} characters, Camera: {metadata.get('has_camera_suggestion', False)}")
318
  return description, metadata
319
 
320
  except Exception as e:
321
- logger.error(f"BAGEL API analysis failed: {e}")
322
- return "API analysis failed", {"error": str(e), "model": "BAGEL-7B-API"}
323
 
324
  finally:
325
- # Always cleanup temporary file
326
  if temp_path:
327
  self._cleanup_temp_file(temp_path)
328
 
329
- def analyze_for_flux_prompt(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
330
- """Analyze image specifically for FLUX prompt generation"""
331
- flux_prompt = """Analyze this image for professional FLUX generation. Provide exactly two sections:
332
-
333
- 1. DESCRIPTION: Create a single flowing paragraph starting directly with the subject. Be precise about:
334
- - Image type (photograph, illustration, artwork)
335
- - Subject matter and composition
336
- - Color palette (specific colors, warm/cool tones, monochrome)
337
- - Lighting conditions and photographic style
338
- - Mood, atmosphere, and artistic elements
339
-
340
- 2. CAMERA_SETUP: Recommend ONE specific professional camera and lens for this scene type:
341
- - Street/urban/documentary: Canon EOS R6 with 35mm f/1.4 lens
342
- - Portrait photography: Canon EOS R5 with 85mm f/1.4 lens
343
- - Landscape photography: Phase One XT with 24-70mm f/4 lens
344
- - Action/sports: Sony A1 with 70-200mm f/2.8 lens
345
 
346
- Give only the camera model and exact lens specification."""
347
-
 
348
  return self.analyze_image(image, flux_prompt)
349
 
 
 
 
 
 
350
  def cleanup(self) -> None:
351
  """Clean up API client resources"""
352
  try:
353
  if hasattr(self, 'client'):
354
  self.client = None
355
  super().cleanup()
356
- logger.info("BAGEL API resources cleaned up")
357
  except Exception as e:
358
- logger.warning(f"BAGEL API cleanup warning: {e}")
359
 
360
 
361
  class FallbackAnalyzer(BaseImageAnalyzer):
362
- """Simple fallback analyzer when BAGEL API is not available"""
363
 
364
  def __init__(self):
365
  super().__init__()
 
366
 
367
  def initialize(self) -> bool:
368
- """Fallback is always ready"""
369
  self.is_initialized = True
370
  return True
371
 
372
  def analyze_image(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
373
- """Provide basic image description"""
374
  try:
375
- # Basic image analysis
376
  width, height = image.size
377
  mode = image.mode
378
-
379
- # Simple descriptive text based on image properties
380
  aspect_ratio = width / height
381
 
 
382
  if aspect_ratio > 1.5:
383
  orientation = "landscape"
384
- camera_suggestion = "wide-angle lens, landscape photography"
 
385
  elif aspect_ratio < 0.75:
386
  orientation = "portrait"
387
- camera_suggestion = "portrait lens, shallow depth of field"
 
388
  else:
389
  orientation = "square"
390
- camera_suggestion = "standard lens, balanced composition"
391
-
392
- description = f"A {orientation} format image with professional composition. The image shows clear detail and good visual balance, suitable for high-quality reproduction. Recommended camera setup: {camera_suggestion}, professional lighting with careful attention to exposure and color balance."
 
 
 
 
 
 
 
 
 
 
 
 
393
 
394
  metadata = {
395
- "model": "Fallback",
396
  "device": "cpu",
397
- "confidence": 0.6,
398
  "image_size": f"{width}x{height}",
399
  "color_mode": mode,
400
  "orientation": orientation,
401
- "aspect_ratio": round(aspect_ratio, 2)
 
 
 
 
402
  }
403
 
404
  return description, metadata
405
 
406
  except Exception as e:
407
- logger.error(f"Fallback analysis failed: {e}")
408
- return "Professional image suitable for detailed analysis and prompt generation", {"error": str(e), "model": "Fallback"}
 
 
 
409
 
410
 
411
  class ModelManager:
412
- """Manager for handling image analysis models"""
413
 
414
- def __init__(self, preferred_model: str = "bagel-api"):
415
  self.preferred_model = preferred_model
416
  self.analyzers = {}
417
  self.current_analyzer = None
@@ -421,42 +518,45 @@ class ModelManager:
421
  model_name = model_name or self.preferred_model
422
 
423
  if model_name not in self.analyzers:
424
- if model_name == "bagel-api":
425
  self.analyzers[model_name] = BagelAPIAnalyzer()
426
  elif model_name == "fallback":
427
  self.analyzers[model_name] = FallbackAnalyzer()
428
  else:
429
- logger.warning(f"Unknown model: {model_name}, using fallback")
430
  model_name = "fallback"
431
  self.analyzers[model_name] = FallbackAnalyzer()
432
 
433
  return self.analyzers[model_name]
434
 
435
- def analyze_image(self, image: Image.Image, model_name: str = None, analysis_type: str = "detailed") -> Tuple[str, Dict[str, Any]]:
436
- """Analyze image with specified or preferred model"""
437
- # Try preferred model first
438
  analyzer = self.get_analyzer(model_name)
439
  if analyzer is None:
440
  return "No analyzer available", {"error": "Model not found"}
441
 
442
- # Choose analysis method based on type
443
- if analysis_type == "flux" and hasattr(analyzer, 'analyze_for_flux_prompt'):
444
- success, result = safe_execute(analyzer.analyze_for_flux_prompt, image)
 
 
 
 
445
  else:
446
  success, result = safe_execute(analyzer.analyze_image, image)
447
 
448
  if success and result[1].get("error") is None:
449
  return result
450
  else:
451
- # Fallback to simple analyzer if main model fails
452
- logger.warning(f"Primary model failed, using fallback: {result}")
453
  fallback_analyzer = self.get_analyzer("fallback")
454
  fallback_success, fallback_result = safe_execute(fallback_analyzer.analyze_image, image)
455
 
456
  if fallback_success:
457
  return fallback_result
458
  else:
459
- return "All analyzers failed", {"error": "Complete analysis failure"}
460
 
461
  def cleanup_all(self) -> None:
462
  """Clean up all model resources"""
@@ -464,24 +564,24 @@ class ModelManager:
464
  analyzer.cleanup()
465
  self.analyzers.clear()
466
  clean_memory()
467
- logger.info("All analyzers cleaned up")
468
 
469
 
470
- # Global model manager instance
471
- model_manager = ModelManager(preferred_model="bagel-api")
472
 
473
 
474
- def analyze_image(image: Image.Image, model_name: str = None, analysis_type: str = "detailed") -> Tuple[str, Dict[str, Any]]:
475
  """
476
- Convenience function for image analysis using BAGEL API
477
 
478
  Args:
479
  image: PIL Image to analyze
480
- model_name: Optional model name ("bagel-api" or "fallback")
481
- analysis_type: Type of analysis ("detailed" or "flux")
482
 
483
  Returns:
484
- Tuple of (description, metadata)
485
  """
486
  return model_manager.analyze_image(image, model_name, analysis_type)
487
 
 
1
  """
2
+ Model management for Phramer AI
3
+ By Pariente AI, for MIA TV Series
4
+
5
+ BAGEL 7B integration with professional photography knowledge enhancement
6
  """
7
 
8
  import spaces
 
14
  from PIL import Image
15
  from gradio_client import Client, handle_file
16
 
17
+ from config import get_device_config, PROFESSIONAL_PHOTOGRAPHY_CONFIG
18
  from utils import clean_memory, safe_execute
19
+ from professional_photography import (
20
+ ProfessionalPhotoAnalyzer,
21
+ enhance_flux_prompt_with_professional_knowledge,
22
+ professional_analyzer
23
+ )
24
 
25
  logger = logging.getLogger(__name__)
26
 
 
46
 
47
 
48
  class BagelAPIAnalyzer(BaseImageAnalyzer):
49
+ """BAGEL 7B model with professional photography knowledge integration"""
50
 
51
  def __init__(self):
52
  super().__init__()
53
  self.client = None
54
  self.space_url = "Malaji71/Bagel-7B-Demo"
55
  self.api_endpoint = "/image_understanding"
56
+ self.hf_token = os.getenv("HF_TOKEN")
57
+ self.professional_analyzer = professional_analyzer
58
 
59
  def initialize(self) -> bool:
60
  """Initialize BAGEL API client with authentication"""
 
62
  return True
63
 
64
  try:
65
+ logger.info("Initializing BAGEL API client for Phramer AI...")
66
 
67
+ # Initialize client with token if available
68
  if self.hf_token:
69
+ logger.info("Using HF token for enhanced API access")
70
  self.client = Client(self.space_url, hf_token=self.hf_token)
71
  else:
72
+ logger.info("Using public API access")
73
  self.client = Client(self.space_url)
74
 
75
  self.is_initialized = True
 
78
 
79
  except Exception as e:
80
  logger.error(f"BAGEL API client initialization failed: {e}")
 
81
  if self.hf_token:
82
  logger.info("Retrying without token...")
83
  try:
84
  self.client = Client(self.space_url)
85
  self.is_initialized = True
86
+ logger.info("BAGEL API client initialized (fallback mode)")
87
  return True
88
  except Exception as e2:
89
+ logger.error(f"Fallback initialization failed: {e2}")
90
  return False
91
 
92
+ def _create_professional_enhanced_prompt(self, analysis_type: str = "multimodal") -> str:
93
+ """Create professionally enhanced prompt for BAGEL analysis"""
94
+
95
+ if analysis_type == "cinematic":
96
+ return """Analyze this image for professional cinematic prompt generation. You are an expert cinematographer with 30+ years of cinema experience. Provide exactly two sections:
97
+
98
+ 1. DESCRIPTION: Create a detailed, flowing paragraph describing the image for cinematic reproduction:
99
+ - Scene composition and visual storytelling elements
100
+ - Lighting quality, direction, and dramatic mood
101
+ - Color palette, tonal relationships, and atmospheric elements
102
+ - Subject positioning, environmental context, and framing
103
+ - Cinematic qualities: film grain, depth of field, visual style
104
+ - Technical photographic elements that enhance realism
105
+
106
+ 2. CAMERA_SETUP: Recommend professional cinema/photography equipment based on scene analysis:
107
+ - Camera body: Choose from Canon EOS R5/R6, Sony A7R/A1, Leica M11, ARRI Alexa, RED cameras
108
+ - Lens: Specific focal length and aperture (e.g., "85mm f/1.4", "35mm anamorphic f/2.8")
109
+ - Technical settings: Aperture consideration for depth of field and story mood
110
+ - Lighting setup: Professional lighting rationale (key, fill, rim, practical lights)
111
+ - Shooting style: Documentary, portrait, landscape, architectural, or cinematic approach
112
+
113
+ Apply professional cinematography principles: rule of thirds, leading lines, depth layering, lighting direction for mood, and technical excellence. Focus on creating prompts optimized for photorealistic, cinema-quality generation."""
114
+
115
+ elif analysis_type == "flux_optimized":
116
+ return """Analyze this image for FLUX prompt generation with professional cinematography expertise. You have 30+ years of cinema experience. Provide exactly two sections:
117
+
118
+ 1. DESCRIPTION: Professional analysis for photorealistic reproduction:
119
+ - Image type and photographic classification
120
+ - Subject matter with precise visual details
121
+ - Lighting analysis: quality, direction, color temperature, shadows
122
+ - Composition elements: framing, balance, visual flow
123
+ - Color relationships and tonal values
124
+ - Artistic style and photographic technique employed
125
+ - Technical qualities that contribute to image impact
126
+
127
+ 2. CAMERA_SETUP: Expert equipment recommendation:
128
+ - Professional camera body suited for scene type
129
+ - Specific lens with focal length and maximum aperture
130
+ - Recommended shooting aperture for optimal depth of field
131
+ - Technical considerations: ISO, lighting setup, focus technique
132
+ - Professional shooting approach and methodology
133
+
134
+ Integrate advanced cinematography principles: exposure triangle mastery, lighting ratios, compositional rules, focus techniques, and professional equipment knowledge. Output should be optimized for FLUX's photorealistic capabilities."""
135
+
136
+ else: # multimodal analysis
137
+ return """Analyze this image with professional cinematography expertise for multi-platform prompt generation. You are a master cinematographer with extensive technical and artistic knowledge from 30+ years in cinema. Provide exactly two sections:
138
+
139
+ 1. DESCRIPTION: Expert visual analysis for prompt generation:
140
+ - Comprehensive scene description with photographic insight
141
+ - Subject matter, composition, and visual hierarchy
142
+ - Lighting analysis: quality, direction, mood, technical setup
143
+ - Color palette, contrast, and tonal relationships
144
+ - Artistic elements: style, mood, atmosphere, visual impact
145
+ - Technical photographic qualities and execution
146
+
147
+ 2. CAMERA_SETUP: Professional equipment and technique recommendation:
148
+ - Camera system recommendation based on scene requirements
149
+ - Lens selection with specific focal length and aperture range
150
+ - Technical shooting parameters and considerations
151
+ - Lighting setup and methodology for scene recreation
152
+ - Professional approach: shooting style and technical execution
153
+
154
+ Apply master-level cinematography knowledge: advanced composition techniques, professional lighting principles, camera system expertise, lens characteristics, and technical excellence. Create content suitable for multiple generative engines (Flux, Midjourney, etc.) with emphasis on photorealistic quality."""
155
+
156
+ def _extract_professional_camera_setup(self, description: str) -> Optional[str]:
157
+ """Extract and enhance camera setup with professional photography knowledge"""
158
  try:
159
+ camera_setup = None
160
+
161
+ # Extract BAGEL's camera recommendation
162
  if "CAMERA_SETUP:" in description:
163
  parts = description.split("CAMERA_SETUP:")
164
  if len(parts) > 1:
165
  camera_section = parts[1].strip()
 
166
  camera_text = camera_section.split('\n')[0].strip()
167
+ if len(camera_text) > 20:
168
+ camera_setup = self._parse_professional_camera_recommendation(camera_text)
169
 
170
+ elif "2. CAMERA_SETUP" in description:
 
171
  parts = description.split("2. CAMERA_SETUP")
172
  if len(parts) > 1:
173
  camera_section = parts[1].strip()
174
  camera_text = camera_section.split('\n')[0].strip()
175
  if len(camera_text) > 20:
176
+ camera_setup = self._parse_professional_camera_recommendation(camera_text)
177
 
178
+ # Fallback: look for camera recommendations in text
179
+ if not camera_setup:
180
+ camera_setup = self._find_professional_camera_recommendation(description)
 
181
 
182
+ return camera_setup
183
 
184
  except Exception as e:
185
+ logger.warning(f"Failed to extract professional camera setup: {e}")
186
  return None
187
 
188
+ def _parse_professional_camera_recommendation(self, camera_text: str) -> Optional[str]:
189
+ """Parse camera recommendation with professional photography enhancement"""
190
  try:
191
+ # Clean and extract with professional patterns
192
  camera_text = re.sub(r'^(Based on.*?recommend|I would recommend|For this.*?recommend)\s*', '', camera_text, flags=re.IGNORECASE)
 
193
 
194
+ # Professional camera patterns (more comprehensive)
195
  camera_patterns = [
196
+ r'(Canon EOS R[^\s,]*(?:\s+[^\s,]*)?)',
197
+ r'(Sony A[^\s,]*(?:\s+[^\s,]*)?)',
198
  r'(Leica [^\s,]+)',
199
  r'(Hasselblad [^\s,]+)',
200
  r'(Phase One [^\s,]+)',
201
+ r'(Fujifilm [^\s,]+)',
202
+ r'(ARRI [^\s,]+)',
203
+ r'(RED [^\s,]+)',
204
+ r'(Nikon [^\s,]+)'
205
  ]
206
 
207
  camera_model = None
 
211
  camera_model = match.group(1).strip()
212
  break
213
 
214
+ # Professional lens patterns (enhanced)
215
  lens_patterns = [
216
+ r'(\d+mm\s*f/[\d.]+(?:\s*(?:lens|anamorphic|telephoto|wide))?)',
217
  r'(\d+-\d+mm\s*f/[\d.]+(?:\s*lens)?)',
218
  r'(with\s+(?:a\s+)?(\d+mm[^,.]*))',
219
+ r'(paired with.*?(\d+mm[^,.]*))',
220
+ r'(\d+mm[^,]*anamorphic[^,]*)',
221
+ r'(\d+mm[^,]*telephoto[^,]*)'
222
  ]
223
 
224
  lens_info = None
 
229
  lens_info = re.sub(r'^(with\s+(?:a\s+)?|paired with\s+)', '', lens_info, flags=re.IGNORECASE)
230
  break
231
 
232
+ # Build professional recommendation
 
 
 
 
 
 
 
233
  parts = []
234
  if camera_model:
235
  parts.append(camera_model)
 
238
 
239
  if parts:
240
  result = ', '.join(parts)
241
+ logger.info(f"Professional camera setup extracted: {result}")
242
  return result
243
 
244
  return None
245
 
246
  except Exception as e:
247
+ logger.warning(f"Failed to parse professional camera recommendation: {e}")
248
  return None
249
 
250
+ def _find_professional_camera_recommendation(self, text: str) -> Optional[str]:
251
+ """Find professional camera recommendations with enhanced detection"""
252
  try:
 
253
  sentences = re.split(r'[.!?]', text)
254
 
255
  for sentence in sentences:
256
+ # Professional camera brands and technical terms
257
+ if any(brand in sentence.lower() for brand in ['canon', 'sony', 'leica', 'hasselblad', 'phase one', 'fujifilm', 'arri', 'red']):
258
+ if any(term in sentence.lower() for term in ['recommend', 'suggest', 'would use', 'camera', 'lens', 'shot on']):
259
+ parsed = self._parse_professional_camera_recommendation(sentence.strip())
260
  if parsed:
261
  return parsed
262
 
263
  return None
264
 
265
  except Exception as e:
266
+ logger.warning(f"Failed to find professional camera recommendation: {e}")
267
  return None
268
 
269
+ def _enhance_description_with_professional_context(self, description: str, image: Image.Image) -> str:
270
+ """Enhance BAGEL description with professional cinematography context"""
271
+ try:
272
+ if not PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("enable_expert_analysis", True):
273
+ return description
274
+
275
+ # Get professional cinematography context without being invasive
276
+ enhanced_context = self.professional_analyzer.generate_enhanced_context(description)
277
+
278
+ # Extract key professional insights
279
+ scene_type = enhanced_context.get("scene_type", "general")
280
+ technical_context = enhanced_context.get("technical_context", "")
281
+ professional_insight = enhanced_context.get("professional_insight", "")
282
+
283
+ # Enhance description subtly with professional terminology
284
+ enhanced_description = description
285
+
286
+ # Add professional context if not already present
287
+ if technical_context and len(technical_context) > 20:
288
+ # Only add if it doesn't duplicate existing information
289
+ if not any(term in description.lower() for term in ["shot on", "professional", "camera"]):
290
+ enhanced_description += f"\n\nProfessional Context: {technical_context}"
291
+
292
+ logger.info(f"Enhanced description with cinematography context for {scene_type} scene")
293
+ return enhanced_description
294
+
295
+ except Exception as e:
296
+ logger.warning(f"Cinematography context enhancement failed: {e}")
297
+ return description
298
+
299
  def _save_temp_image(self, image: Image.Image) -> str:
300
  """Save image to temporary file for API call"""
301
  try:
 
302
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png')
303
  temp_path = temp_file.name
304
  temp_file.close()
305
 
 
306
  if image.mode != 'RGB':
307
  image = image.convert('RGB')
308
  image.save(temp_path, 'PNG')
 
323
 
324
  @spaces.GPU(duration=60)
325
  def analyze_image(self, image: Image.Image, prompt: str = None) -> Tuple[str, Dict[str, Any]]:
326
+ """Analyze image using BAGEL API with professional cinematography enhancement"""
327
  if not self.is_initialized:
328
  success = self.initialize()
329
  if not success:
330
  return "BAGEL API not available", {"error": "API initialization failed"}
331
 
332
  temp_path = None
 
 
333
  metadata = {
334
+ "model": "BAGEL-7B-Professional",
335
  "device": "api",
336
  "confidence": 0.9,
337
  "api_endpoint": self.api_endpoint,
338
  "space_url": self.space_url,
339
  "prompt_used": prompt,
340
+ "has_camera_suggestion": False,
341
+ "professional_enhancement": True
342
  }
343
 
344
  try:
345
+ # Use professional enhanced prompt if none provided
346
  if prompt is None:
347
+ prompt = self._create_professional_enhanced_prompt("multimodal")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
  # Save image to temporary file
350
  temp_path = self._save_temp_image(image)
351
  if not temp_path:
352
  return "Image processing failed", {"error": "Could not save image"}
353
 
354
+ logger.info("Calling BAGEL API with professional cinematography context...")
355
 
356
+ # Call BAGEL API with enhanced prompt
357
  result = self.client.predict(
358
  image=handle_file(temp_path),
359
  prompt=prompt,
 
364
  api_name=self.api_endpoint
365
  )
366
 
367
+ # Extract and process response
368
  if isinstance(result, tuple) and len(result) >= 2:
369
  description = result[1] if result[1] else result[0]
370
  else:
371
  description = str(result)
372
 
 
373
  if isinstance(description, str) and description.strip():
374
  description = description.strip()
375
 
376
+ # Extract professional camera setup
377
+ camera_setup = self._extract_professional_camera_setup(description)
378
  if camera_setup:
379
  metadata["camera_setup"] = camera_setup
380
  metadata["has_camera_suggestion"] = True
381
+ logger.info(f"Professional camera setup extracted: {camera_setup}")
382
  else:
383
  metadata["has_camera_suggestion"] = False
384
+ logger.info("No camera setup found, will use professional fallback")
385
+
386
+ # Enhance description with cinematography context
387
+ if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("knowledge_base_integration", True):
388
+ description = self._enhance_description_with_professional_context(description, image)
389
+ metadata["cinematography_context_applied"] = True
390
+
391
  else:
392
+ description = "Professional image analysis completed successfully"
393
  metadata["has_camera_suggestion"] = False
394
 
395
+ # Update metadata
396
  metadata.update({
397
+ "response_length": len(description),
398
+ "analysis_type": "professional_enhanced"
399
  })
400
 
401
+ logger.info(f"BAGEL Professional analysis complete: {len(description)} chars, Camera: {metadata.get('has_camera_suggestion', False)}")
402
  return description, metadata
403
 
404
  except Exception as e:
405
+ logger.error(f"BAGEL Professional analysis failed: {e}")
406
+ return "Professional analysis failed", {"error": str(e), "model": "BAGEL-7B-Professional"}
407
 
408
  finally:
 
409
  if temp_path:
410
  self._cleanup_temp_file(temp_path)
411
 
412
+ def analyze_for_cinematic_prompt(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
413
+ """Analyze image specifically for cinematic/MIA TV Series prompt generation"""
414
+ cinematic_prompt = self._create_professional_enhanced_prompt("cinematic")
415
+ return self.analyze_image(image, cinematic_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
416
 
417
+ def analyze_for_flux_with_professional_context(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
418
+ """Analyze image for FLUX with enhanced professional cinematography context"""
419
+ flux_prompt = self._create_professional_enhanced_prompt("flux_optimized")
420
  return self.analyze_image(image, flux_prompt)
421
 
422
+ def analyze_for_multiengine_prompt(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
423
+ """Analyze image for multi-engine compatibility (Flux, Midjourney, etc.)"""
424
+ multiengine_prompt = self._create_professional_enhanced_prompt("multimodal")
425
+ return self.analyze_image(image, multiengine_prompt)
426
+
427
  def cleanup(self) -> None:
428
  """Clean up API client resources"""
429
  try:
430
  if hasattr(self, 'client'):
431
  self.client = None
432
  super().cleanup()
433
+ logger.info("BAGEL Professional API resources cleaned up")
434
  except Exception as e:
435
+ logger.warning(f"BAGEL Professional API cleanup warning: {e}")
436
 
437
 
438
  class FallbackAnalyzer(BaseImageAnalyzer):
439
+ """Enhanced fallback analyzer with basic professional cinematography principles"""
440
 
441
  def __init__(self):
442
  super().__init__()
443
+ self.professional_analyzer = professional_analyzer
444
 
445
  def initialize(self) -> bool:
446
+ """Fallback with cinematography enhancement is always ready"""
447
  self.is_initialized = True
448
  return True
449
 
450
  def analyze_image(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
451
+ """Provide enhanced image description with cinematography context"""
452
  try:
 
453
  width, height = image.size
454
  mode = image.mode
 
 
455
  aspect_ratio = width / height
456
 
457
+ # Enhanced scene detection
458
  if aspect_ratio > 1.5:
459
  orientation = "landscape"
460
+ scene_type = "landscape"
461
+ camera_suggestion = "Phase One XT with 24-70mm f/4 lens, landscape photography"
462
  elif aspect_ratio < 0.75:
463
  orientation = "portrait"
464
+ scene_type = "portrait_studio"
465
+ camera_suggestion = "Canon EOS R5 with 85mm f/1.4 lens, portrait photography"
466
  else:
467
  orientation = "square"
468
+ scene_type = "general"
469
+ camera_suggestion = "Canon EOS R6 with 50mm f/1.8 lens, standard photography"
470
+
471
+ # Generate professional description
472
+ description = f"A {orientation} format professional photograph with balanced composition and technical excellence. The image demonstrates clear visual hierarchy and professional execution, suitable for high-quality reproduction across multiple generative platforms. Recommended professional setup: {camera_suggestion}, with careful attention to exposure, lighting, and artistic composition."
473
+
474
+ # Add cinematography context if available
475
+ try:
476
+ if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("enable_expert_analysis", True):
477
+ enhanced_context = self.professional_analyzer.generate_enhanced_context(description)
478
+ technical_context = enhanced_context.get("technical_context", "")
479
+ if technical_context:
480
+ description += f" Cinematography context: {technical_context}"
481
+ except Exception as e:
482
+ logger.warning(f"Cinematography context enhancement failed in fallback: {e}")
483
 
484
  metadata = {
485
+ "model": "Professional-Fallback",
486
  "device": "cpu",
487
+ "confidence": 0.7,
488
  "image_size": f"{width}x{height}",
489
  "color_mode": mode,
490
  "orientation": orientation,
491
+ "aspect_ratio": round(aspect_ratio, 2),
492
+ "scene_type": scene_type,
493
+ "has_camera_suggestion": True,
494
+ "camera_setup": camera_suggestion,
495
+ "professional_enhancement": True
496
  }
497
 
498
  return description, metadata
499
 
500
  except Exception as e:
501
+ logger.error(f"Professional fallback analysis failed: {e}")
502
+ return "Professional image suitable for detailed analysis and multi-engine prompt generation", {
503
+ "error": str(e),
504
+ "model": "Professional-Fallback"
505
+ }
506
 
507
 
508
  class ModelManager:
509
+ """Enhanced manager for handling image analysis models with professional cinematography integration"""
510
 
511
+ def __init__(self, preferred_model: str = "bagel-professional"):
512
  self.preferred_model = preferred_model
513
  self.analyzers = {}
514
  self.current_analyzer = None
 
518
  model_name = model_name or self.preferred_model
519
 
520
  if model_name not in self.analyzers:
521
+ if model_name in ["bagel-api", "bagel-professional"]:
522
  self.analyzers[model_name] = BagelAPIAnalyzer()
523
  elif model_name == "fallback":
524
  self.analyzers[model_name] = FallbackAnalyzer()
525
  else:
526
+ logger.warning(f"Unknown model: {model_name}, using professional fallback")
527
  model_name = "fallback"
528
  self.analyzers[model_name] = FallbackAnalyzer()
529
 
530
  return self.analyzers[model_name]
531
 
532
+ def analyze_image(self, image: Image.Image, model_name: str = None, analysis_type: str = "multiengine") -> Tuple[str, Dict[str, Any]]:
533
+ """Analyze image with professional cinematography enhancement"""
 
534
  analyzer = self.get_analyzer(model_name)
535
  if analyzer is None:
536
  return "No analyzer available", {"error": "Model not found"}
537
 
538
+ # Choose analysis method based on type and analyzer capabilities
539
+ if analysis_type == "cinematic" and hasattr(analyzer, 'analyze_for_cinematic_prompt'):
540
+ success, result = safe_execute(analyzer.analyze_for_cinematic_prompt, image)
541
+ elif analysis_type == "flux" and hasattr(analyzer, 'analyze_for_flux_with_professional_context'):
542
+ success, result = safe_execute(analyzer.analyze_for_flux_with_professional_context, image)
543
+ elif analysis_type == "multiengine" and hasattr(analyzer, 'analyze_for_multiengine_prompt'):
544
+ success, result = safe_execute(analyzer.analyze_for_multiengine_prompt, image)
545
  else:
546
  success, result = safe_execute(analyzer.analyze_image, image)
547
 
548
  if success and result[1].get("error") is None:
549
  return result
550
  else:
551
+ # Enhanced fallback with cinematography context
552
+ logger.warning(f"Primary model failed, using cinematography-enhanced fallback: {result}")
553
  fallback_analyzer = self.get_analyzer("fallback")
554
  fallback_success, fallback_result = safe_execute(fallback_analyzer.analyze_image, image)
555
 
556
  if fallback_success:
557
  return fallback_result
558
  else:
559
+ return "All cinematography analyzers failed", {"error": "Complete analysis failure"}
560
 
561
  def cleanup_all(self) -> None:
562
  """Clean up all model resources"""
 
564
  analyzer.cleanup()
565
  self.analyzers.clear()
566
  clean_memory()
567
+ logger.info("All cinematography analyzers cleaned up")
568
 
569
 
570
+ # Global model manager instance with cinematography enhancement
571
+ model_manager = ModelManager(preferred_model="bagel-professional")
572
 
573
 
574
+ def analyze_image(image: Image.Image, model_name: str = None, analysis_type: str = "multiengine") -> Tuple[str, Dict[str, Any]]:
575
  """
576
+ Enhanced convenience function for professional cinematography analysis
577
 
578
  Args:
579
  image: PIL Image to analyze
580
+ model_name: Optional model name ("bagel-professional", "fallback")
581
+ analysis_type: Type of analysis ("multiengine", "cinematic", "flux")
582
 
583
  Returns:
584
+ Tuple of (description, metadata) with professional cinematography enhancement
585
  """
586
  return model_manager.analyze_image(image, model_name, analysis_type)
587