Malaji71 commited on
Commit
7db5a7a
·
verified ·
1 Parent(s): ad6905a

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +278 -532
utils.py CHANGED
@@ -142,525 +142,300 @@ def apply_flux_rules(prompt: str, analysis_metadata: Optional[Dict[str, Any]] =
142
  if not prompt or not isinstance(prompt, str):
143
  return ""
144
 
145
- # Clean the prompt from unwanted elements
146
- cleaned_prompt = prompt
147
- for pattern in FLUX_RULES["remove_patterns"]:
148
- cleaned_prompt = re.sub(pattern, '', cleaned_prompt, flags=re.IGNORECASE)
149
-
150
- # Extract description part only (remove CAMERA_SETUP section if present)
151
- description_part = _extract_description_only(cleaned_prompt)
152
-
153
- # NEW: Convert to generative language with cinematography angle detection
154
- if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("prompt_condensation", True):
155
- description_part = _convert_to_cinematographic_language(description_part)
156
- logger.info("Applied cinematographic language conversion")
157
-
158
- # Check if BAGEL provided intelligent camera setup with cinematography context
159
- camera_config = ""
160
- scene_type = "default"
161
-
162
- if analysis_metadata and analysis_metadata.get("has_camera_suggestion") and analysis_metadata.get("camera_setup"):
163
- # Use BAGEL's intelligent camera suggestion - enhanced with cinematography knowledge
164
- bagel_camera = analysis_metadata["camera_setup"]
165
- scene_type = detect_scene_type_from_analysis(analysis_metadata)
166
- camera_config = _format_professional_camera_suggestion(bagel_camera, scene_type)
167
- logger.info(f"Using BAGEL cinematography suggestion: {camera_config}")
168
- else:
169
- # Enhanced fallback with professional cinematography knowledge
170
- scene_type = _detect_scene_from_description(description_part.lower())
171
- camera_config = _get_enhanced_camera_config(scene_type, description_part.lower())
172
- logger.info(f"Using enhanced cinematography configuration for {scene_type}")
173
-
174
- # Add enhanced lighting with cinematography principles
175
- lighting_enhancement = _get_cinematography_lighting_enhancement(description_part.lower(), camera_config, scene_type)
176
-
177
- # Add style enhancement for multi-engine compatibility
178
- style_enhancement = _get_style_enhancement(scene_type, description_part.lower())
179
-
180
- # NEW: Smart keyword insertion with token economy
181
- smart_keywords = _apply_smart_keyword_insertion(description_part, camera_config, scene_type)
182
-
183
- # Build final prompt: Description + Camera + Lighting + Style + Smart Keywords
184
- final_prompt = description_part + camera_config + lighting_enhancement + style_enhancement + smart_keywords
185
-
186
- # NEW: Final length optimization with token economy
187
- if PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("prompt_optimization", {}).get("max_length"):
188
- final_prompt = _optimize_prompt_with_token_economy(final_prompt)
189
-
190
- # Clean up formatting
191
- final_prompt = _clean_prompt_formatting(final_prompt)
192
-
193
- return final_prompt
194
-
195
-
196
- def _extract_description_only(prompt: str) -> str:
197
- """Extract only the description part, removing camera setup sections"""
198
- # Remove CAMERA_SETUP section if present
199
- if "CAMERA_SETUP:" in prompt:
200
- parts = prompt.split("CAMERA_SETUP:")
201
- description = parts[0].strip()
202
- elif "2. CAMERA_SETUP" in prompt:
203
- parts = prompt.split("2. CAMERA_SETUP")
204
- description = parts[0].strip()
205
- else:
206
- description = prompt
207
-
208
- # Remove "DESCRIPTION:" label if present
209
- if description.startswith("DESCRIPTION:"):
210
- description = description.replace("DESCRIPTION:", "").strip()
211
- elif description.startswith("1. DESCRIPTION:"):
212
- description = description.replace("1. DESCRIPTION:", "").strip()
213
-
214
- # Clean up any remaining camera recommendations from the description
215
- description = re.sub(r'For this type of scene.*?shooting style would be.*?\.', '', description, flags=re.DOTALL)
216
- description = re.sub(r'I would recommend.*?aperture.*?\.', '', description, flags=re.DOTALL)
217
- description = re.sub(r'Professional Context:.*?\.', '', description, flags=re.DOTALL)
218
- description = re.sub(r'Cinematography context:.*?\.', '', description, flags=re.DOTALL)
219
-
220
- # Remove numbered section residues
221
- description = re.sub(r'\s*\d+\.\s*,?\s*$', '', description)
222
- description = re.sub(r'\s*\d+\.\s*,?\s*', ' ', description)
223
-
224
- return description.strip()
225
-
226
-
227
- def _detect_camera_angles(description: str) -> List[str]:
228
- """Detect camera angles and perspectives using professional cinematography knowledge"""
229
  try:
230
- angles_detected = []
231
- description_lower = description.lower()
232
-
233
- # Low angle (contrapicado) detection
234
- low_angle_indicators = [
235
- "looking up at", "from below", "upward angle", "towering", "looming",
236
- "shot from ground level", "worm's eye", "low angle"
237
- ]
238
- if any(indicator in description_lower for indicator in low_angle_indicators):
239
- angles_detected.append("low-angle shot")
240
-
241
- # High angle (picado) detection
242
- high_angle_indicators = [
243
- "looking down", "from above", "overhead", "bird's eye", "aerial view",
244
- "downward angle", "top-down", "high angle"
245
- ]
246
- if any(indicator in description_lower for indicator in high_angle_indicators):
247
- angles_detected.append("high-angle shot")
248
 
249
- # Eye level detection
250
- eye_level_indicators = [
251
- "eye level", "straight on", "direct view", "level with"
252
- ]
253
- if any(indicator in description_lower for indicator in eye_level_indicators):
254
- angles_detected.append("eye-level shot")
255
 
256
- # Dutch angle detection
257
- dutch_indicators = [
258
- "tilted", "angled", "diagonal", "off-kilter", "dutch angle"
259
- ]
260
- if any(indicator in description_lower for indicator in dutch_indicators):
261
- angles_detected.append("dutch angle")
262
 
263
- # Perspective analysis for mixed angles
264
- if ("foreground" in description_lower and "background" in description_lower):
265
- if ("close" in description_lower or "prominent" in description_lower) and "blurred" in description_lower:
266
- # Suggests foreground element shot from specific angle with background perspective
267
- if not angles_detected: # Only add if no specific angle detected
268
- angles_detected.append("shallow depth perspective")
269
 
270
- logger.info(f"Camera angles detected: {angles_detected}")
271
- return angles_detected
272
 
273
  except Exception as e:
274
- logger.warning(f"Camera angle detection failed: {e}")
275
- return []
276
 
277
 
278
- def _convert_to_cinematographic_language(description: str) -> str:
279
- """Convert descriptive analysis to cinematographic prompt language with angle detection"""
280
  try:
281
- # First detect camera angles
282
- camera_angles = _detect_camera_angles(description)
283
-
284
- generative = description
285
-
286
- # Remove descriptive introduction phrases
287
- descriptive_intros = [
288
- r'This image (?:features|shows|depicts|presents|displays)',
289
- r'The image (?:features|shows|depicts|presents|displays)',
290
- r'This (?:photograph|picture|scene|composition) (?:features|shows|depicts)',
291
- r'The (?:photograph|picture|scene|composition) (?:features|shows|depicts)',
292
- r'This is (?:a|an) (?:image|photograph|picture) (?:of|showing)',
293
- r'The setting (?:appears to be|is)',
294
- r'The scene (?:appears to be|is|shows)',
 
 
 
 
295
  ]
296
 
297
- for pattern in descriptive_intros:
298
- generative = re.sub(pattern, '', generative, flags=re.IGNORECASE)
299
-
300
- # Remove uncertainty and verbose connector phrases
301
- verbose_phrases = [
302
- r'possibly (?:a|an) ',
303
- r'appears to be (?:a|an) ',
304
- r'seems to be (?:a|an) ',
305
- r'might be (?:a|an) ',
306
- r'could be (?:a|an) ',
307
- r'suggests (?:a|an) ',
308
- r'indicating (?:a|an) ',
309
- r'(?:possibly|apparently|seemingly|likely)',
310
- r'which (?:is|are|creates|adds)',
311
- r'(?:In the background|In the foreground), (?:there are|there is)',
312
- r'(?:The background|The foreground) (?:features|shows|contains)',
313
- r'(?:There are|There is) [^,]+ (?:in the background|in the foreground)',
314
- r'The overall (?:setting|atmosphere|mood) (?:suggests|indicates)',
315
- ]
316
 
317
- for pattern in verbose_phrases:
318
- generative = re.sub(pattern, '', generative, flags=re.IGNORECASE)
319
-
320
- # Convert spatial relationships to cinematographic terms
321
- spatial_conversions = [
322
- # Background/foreground to cinematographic terms
323
- (r'prominently displayed in (?:the )?foreground', 'foreground focus'),
324
- (r'in (?:the )?foreground', 'foreground'),
325
- (r'in (?:the )?background', 'background'),
326
- (r'blurred (?:figures|people|objects)', 'bokeh blur'),
327
- (r'out of focus', 'soft focus'),
328
- # Convert descriptive structure to noun phrases
329
- (r'(?:close-up|medium shot|wide shot) of (?:a|an|the) ', r'close-up '),
330
- (r'(?:a|an|the) (\w+)', r'\1'),
331
- # Remove excessive connecting words
332
- (r'(?:, and|, with|, featuring)', ','),
333
- # Simplify location descriptions
334
- (r'on (?:a|an|the) ', r'on '),
335
- (r'in (?:a|an|the) ', r'in '),
336
- ]
337
 
338
- for pattern, replacement in spatial_conversions:
339
- generative = re.sub(pattern, replacement, generative, flags=re.IGNORECASE)
340
 
341
- # Convert action descriptions to present participles
342
- action_conversions = [
343
- (r'(\w+) (?:are|is) walking', r'\1 walking'),
344
- (r'(\w+) (?:are|is) standing', r'\1 standing'),
345
- (r'(\w+) (?:are|is) sitting', r'\1 sitting'),
346
- (r'people (?:are|is) out of focus', r'blurred people'),
347
- ]
348
 
349
- for pattern, replacement in action_conversions:
350
- generative = re.sub(pattern, replacement, generative, flags=re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
 
352
- # Add detected camera angles at the beginning
353
- if camera_angles:
354
- angle_prefix = ", ".join(camera_angles)
355
- generative = f"{angle_prefix}, {generative}"
356
 
357
  # Clean up extra spaces and punctuation
358
- generative = re.sub(r'\s+', ' ', generative)
359
- generative = re.sub(r'^\s*,\s*', '', generative) # Remove leading commas
360
- generative = re.sub(r'\s*,\s*,+', ',', generative) # Remove double commas
361
- generative = re.sub(r'\.+', '.', generative) # Remove multiple periods
362
-
363
- # Ensure it starts with a capital letter
364
- generative = generative.strip()
365
- if generative:
366
- generative = generative[0].upper() + generative[1:] if len(generative) > 1 else generative.upper()
367
 
368
- logger.info(f"Cinematographic conversion: angles={len(camera_angles)}, {len(description)} → {len(generative)} chars")
369
- return generative
370
 
371
  except Exception as e:
372
- logger.warning(f"Cinematographic language conversion failed: {e}")
373
- return description
374
 
375
 
376
- def _apply_smart_keyword_insertion(description: str, camera_config: str, scene_type: str) -> str:
377
- """Smart keyword insertion with token economy - avoid redundancy"""
378
  try:
379
- keywords = []
 
 
 
 
380
 
381
- # Token Economy Rule 1: If camera specs exist, skip "photorealistic" keywords
382
- has_camera_specs = bool(re.search(r'(?:Canon|Sony|Leica|ARRI|RED|Hasselblad|Phase One)', camera_config))
383
- has_lens_specs = bool(re.search(r'\d+mm.*f/[\d.]+', camera_config))
384
 
385
- # Only add quality keywords if NO technical specs present
386
- if not (has_camera_specs and has_lens_specs):
387
- quality_keywords = FLUX_RULES.get("mandatory_keywords", {}).get("quality", [])
388
- keywords.extend(quality_keywords[:2]) # Limit to 2 quality keywords max
389
- logger.info("Added fallback quality keywords (no camera specs detected)")
390
- else:
391
- logger.info("Skipped redundant quality keywords (camera specs present)")
 
 
 
 
 
 
 
 
 
 
 
392
 
393
- # Token Economy Rule 2: Scene-specific keywords only if they add value
394
- style_by_scene = FLUX_RULES.get("mandatory_keywords", {}).get("style_by_scene", {})
395
- if scene_type in style_by_scene:
396
- scene_keywords = style_by_scene[scene_type]
397
-
398
- # Check if scene keywords are already implied by camera config or description
399
- for keyword in scene_keywords:
400
- if keyword.lower() not in camera_config.lower() and keyword.lower() not in description.lower():
401
- keywords.append(keyword)
402
-
403
- # Token Economy Rule 3: Technical keywords only if not redundant
404
- technical_keywords = FLUX_RULES.get("mandatory_keywords", {}).get("technical", [])
405
- for tech_keyword in technical_keywords:
406
- # Skip "professional photography" if camera specs already indicate professional level
407
- if tech_keyword == "professional photography" and has_camera_specs:
408
- continue
409
- # Skip "high resolution" if camera specs include resolution indicators
410
- if tech_keyword == "high resolution" and has_camera_specs:
411
- continue
412
- keywords.append(tech_keyword)
413
-
414
- # Remove duplicates while preserving order
415
- unique_keywords = []
416
- for keyword in keywords:
417
- if keyword not in unique_keywords:
418
- unique_keywords.append(keyword)
419
-
420
- if unique_keywords:
421
- result = ", " + ", ".join(unique_keywords)
422
- logger.info(f"Smart keywords applied: {unique_keywords}")
423
- return result
424
  else:
425
- logger.info("No additional keywords needed (all redundant)")
426
- return ""
427
 
428
  except Exception as e:
429
- logger.warning(f"Smart keyword insertion failed: {e}")
430
- return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
 
 
 
 
 
 
 
 
 
 
 
 
432
 
433
- def _optimize_prompt_with_token_economy(prompt: str) -> str:
434
- """Optimize prompt length with intelligent token economy"""
 
435
  try:
436
- max_words = PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("prompt_optimization", {}).get("max_length", 150)
437
-
438
- words = prompt.split()
439
- if len(words) <= max_words:
440
- return prompt
441
-
442
- # Priority preservation order for token economy
443
- essential_patterns = [
444
- # 1. Camera angles (highest priority)
445
- r'(?:low-angle|high-angle|eye-level|dutch angle|bird\'s eye|worm\'s eye) shot',
446
- # 2. Camera and lens specs
447
- r'(?:Canon|Sony|Leica|ARRI|RED|Hasselblad|Phase One) [^,]+',
448
- r'\d+mm[^,]*f/[\d.]+[^,]*',
449
- r'ISO \d+',
450
- # 3. Core subject and composition
451
- r'(?:close-up|medium shot|wide shot|shallow depth)',
452
- r'(?:foreground|background|bokeh)',
453
- # 4. Scene-specific technical terms
454
- r'(?:cinematic|anamorphic|telephoto|wide-angle)',
455
- ]
456
 
457
- # Extract essential parts first
458
- essential_parts = []
459
- remaining_text = prompt
 
460
 
461
- for pattern in essential_patterns:
462
- matches = re.findall(pattern, remaining_text, re.IGNORECASE)
463
- for match in matches:
464
- if match not in essential_parts:
465
- essential_parts.append(match)
466
- # Remove from remaining text to avoid duplication
467
- remaining_text = re.sub(re.escape(match), '', remaining_text, count=1, flags=re.IGNORECASE)
468
 
469
- # Add essential parts to start
470
- optimized_words = []
471
- for part in essential_parts:
472
- optimized_words.extend(part.split())
473
 
474
- # Fill remaining space with most important remaining words
475
- remaining_words = [w for w in remaining_text.split() if w.strip() and w not in optimized_words]
476
- remaining_space = max_words - len(optimized_words)
477
 
478
- if remaining_space > 0:
479
- optimized_words.extend(remaining_words[:remaining_space])
480
 
481
- optimized = " ".join(optimized_words[:max_words])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
 
483
- logger.info(f"Token economy optimization: {len(words)} {len(optimized_words)} words, preserved {len(essential_parts)} essential elements")
 
 
484
 
485
- return optimized
486
 
487
  except Exception as e:
488
- logger.warning(f"Token economy optimization failed: {e}")
489
- return prompt
490
 
491
 
492
- def _detect_scene_from_description(description_lower: str) -> str:
493
- """Enhanced scene detection from description with cinematography knowledge"""
494
- scene_keywords = PROFESSIONAL_PHOTOGRAPHY_CONFIG.get("scene_detection_keywords", {})
495
-
496
- # Score each scene type
497
- scene_scores = {}
498
- for scene_type, keywords in scene_keywords.items():
499
- score = sum(1 for keyword in keywords if keyword in description_lower)
500
- if score > 0:
501
- scene_scores[scene_type] = score
502
-
503
- # Additional cinematography-specific detection
504
- if any(term in description_lower for term in ["film", "movie", "cinematic", "dramatic lighting", "anamorphic"]):
505
- scene_scores["cinematic"] = scene_scores.get("cinematic", 0) + 2
506
-
507
- if any(term in description_lower for term in ["studio", "controlled lighting", "professional portrait"]):
508
- scene_scores["portrait"] = scene_scores.get("portrait", 0) + 2
509
-
510
- # Return highest scoring scene type
511
- if scene_scores:
512
- return max(scene_scores.items(), key=lambda x: x[1])[0]
513
- else:
514
- return "default"
515
-
516
-
517
- def _format_professional_camera_suggestion(bagel_camera: str, scene_type: str) -> str:
518
- """Format BAGEL's camera suggestion with enhanced cinematography knowledge and fix formatting errors"""
519
  try:
520
- camera_text = bagel_camera.strip()
521
- camera_text = re.sub(r'^CAMERA_SETUP:\s*', '', camera_text)
522
-
523
- # Enhanced extraction patterns for cinema equipment
524
- cinema_patterns = {
525
- 'camera': r'(ARRI [^,]+|RED [^,]+|Canon EOS [^,]+|Sony A[^,]+|Leica [^,]+|Hasselblad [^,]+|Phase One [^,]+)',
526
- 'lens': r'(\d+mm[^,]*(?:anamorphic)?[^,]*)',
527
- 'aperture': r'(f/[\d.]+)'
528
- }
529
-
530
- extracted_parts = []
531
- camera_model = None
532
- lens_spec = None
533
- aperture_spec = None
534
-
535
- # Extract camera
536
- camera_match = re.search(cinema_patterns['camera'], camera_text, re.IGNORECASE)
537
- if camera_match:
538
- camera_model = camera_match.group(1).strip()
539
-
540
- # Extract lens
541
- lens_match = re.search(cinema_patterns['lens'], camera_text, re.IGNORECASE)
542
- if lens_match:
543
- lens_spec = lens_match.group(1).strip()
544
-
545
- # Extract aperture
546
- aperture_match = re.search(cinema_patterns['aperture'], camera_text, re.IGNORECASE)
547
- if aperture_match:
548
- aperture_spec = aperture_match.group(1).strip()
549
-
550
- # Build proper camera setup with all technical specs
551
- if camera_model and lens_spec:
552
- # Fix the "with, 35mm" error by proper formatting
553
- camera_setup = f"{camera_model}, {lens_spec}"
554
-
555
- # Add aperture if found
556
- if aperture_spec:
557
- if 'f/' not in lens_spec: # Don't duplicate aperture
558
- camera_setup += f" at {aperture_spec}"
559
-
560
- # Add ISO and composition based on scene type
561
- enhanced_config = _get_enhanced_camera_config(scene_type, "")
562
-
563
- # Extract ISO and composition from enhanced config
564
- iso_match = re.search(r'ISO \d+', enhanced_config)
565
- composition_match = re.search(r'(rule of thirds|leading lines|symmetrical|centered|hyperfocal distance)[^,]*', enhanced_config)
566
 
567
- if iso_match:
568
- camera_setup += f", {iso_match.group()}"
569
- if composition_match:
570
- camera_setup += f", {composition_match.group()}"
571
-
572
- # Scene-specific enhancement with token economy
573
- if scene_type == "cinematic":
574
- result = f", Shot on {camera_setup}" # Skip redundant "cinematic photography"
575
- elif scene_type == "portrait":
576
- result = f", Shot on {camera_setup}" # Skip redundant "professional portrait photography"
577
- else:
578
- result = f", Shot on {camera_setup}"
579
-
580
- logger.info(f"Formatted camera setup with token economy: {result}")
581
- return result
582
- else:
583
- # Fallback to enhanced config if parsing fails
584
- return _get_enhanced_camera_config(scene_type, camera_text.lower())
585
-
586
- except Exception as e:
587
- logger.warning(f"Failed to format professional camera suggestion: {e}")
588
- return _get_enhanced_camera_config(scene_type, "")
589
-
590
-
591
- def _get_enhanced_camera_config(scene_type: str, description_lower: str) -> str:
592
- """Get enhanced camera configuration with cinematography knowledge"""
593
- # Enhanced camera configurations with cinema equipment
594
- enhanced_configs = {
595
- "cinematic": ", Shot on ARRI Alexa LF, 35mm anamorphic lens at f/2.8, ISO 400",
596
- "portrait": ", Shot on Canon EOS R5, 85mm f/1.4 lens at f/2.8, ISO 200, rule of thirds",
597
- "landscape": ", Shot on Phase One XT, 24-70mm f/4 lens at f/8, ISO 100, hyperfocal distance",
598
- "street": ", Shot on Leica M11, 35mm f/1.4 lens at f/2.8, ISO 800",
599
- "architectural": ", Shot on Canon EOS R5, 24-70mm f/2.8 lens at f/8, ISO 100, symmetrical composition",
600
- "commercial": ", Shot on Hasselblad X2D 100C, 90mm f/2.5 lens at f/4, ISO 100"
601
- }
602
-
603
- # Use enhanced config if available, otherwise fall back to FLUX_RULES
604
- if scene_type in enhanced_configs:
605
- return enhanced_configs[scene_type]
606
- elif scene_type in FLUX_RULES["camera_configs"]:
607
- return FLUX_RULES["camera_configs"][scene_type]
608
- else:
609
- return FLUX_RULES["camera_configs"]["default"]
610
-
611
-
612
- def _get_cinematography_lighting_enhancement(description_lower: str, camera_config: str, scene_type: str) -> str:
613
- """Enhanced lighting with cinematography principles"""
614
- # Don't add lighting if already mentioned
615
- if any(term in description_lower for term in ["lighting", "lit", "illuminated"]) or 'lighting' in camera_config.lower():
616
- return ""
617
-
618
- # Enhanced lighting based on scene type and cinematography knowledge
619
- if scene_type == "cinematic":
620
- if any(term in description_lower for term in ["dramatic", "moody", "dark"]):
621
- return ", dramatic lighting"
622
- else:
623
- return ", cinematic lighting"
624
- elif scene_type == "portrait":
625
- return ", studio lighting"
626
- elif "dramatic" in description_lower or "chaos" in description_lower:
627
- return ", dramatic lighting"
628
- else:
629
- return "" # Skip redundant lighting terms
630
-
631
-
632
- def _get_style_enhancement(scene_type: str, description_lower: str) -> str:
633
- """Get style enhancement for multi-engine compatibility with token economy"""
634
- # Token economy: only add style if it adds unique value
635
- if scene_type == "cinematic":
636
- if "film grain" not in description_lower:
637
- return ", film grain"
638
- elif scene_type == "architectural":
639
- return ", clean lines"
640
-
641
- return "" # Skip redundant style terms
642
-
643
-
644
- def _clean_prompt_formatting(prompt: str) -> str:
645
- """Clean up prompt formatting"""
646
- if not prompt:
647
- return ""
648
-
649
- # Ensure it starts with capital letter
650
- prompt = prompt.strip()
651
- if prompt:
652
- prompt = prompt[0].upper() + prompt[1:] if len(prompt) > 1 else prompt.upper()
653
-
654
- # Clean up spaces and commas
655
- prompt = re.sub(r'\s+', ' ', prompt)
656
- prompt = re.sub(r',\s*,+', ',', prompt)
657
- prompt = re.sub(r'^\s*,\s*', '', prompt) # Remove leading commas
658
- prompt = re.sub(r'\s*,\s*$', '', prompt) # Remove trailing commas
659
-
660
- # Remove redundant periods
661
- prompt = re.sub(r'\.+', '.', prompt)
662
-
663
- return prompt.strip()
664
 
665
 
666
  def calculate_prompt_score(prompt: str, analysis_data: Optional[Dict[str, Any]] = None) -> Tuple[int, Dict[str, int]]:
@@ -681,103 +456,75 @@ def calculate_prompt_score(prompt: str, analysis_data: Optional[Dict[str, Any]]
681
 
682
  # Enhanced Prompt Quality (0-25 points)
683
  length_score = min(15, len(prompt) // 10) # Reward appropriate length
684
- detail_score = min(10, len(prompt.split(',')) * 1.5) # Reward structured detail
685
  breakdown["prompt_quality"] = int(length_score + detail_score)
686
 
687
  # Technical Details with Cinematography Focus (0-25 points)
688
  tech_score = 0
689
 
690
  # Cinema equipment (higher scores for professional gear)
691
- cinema_equipment = ['ARRI', 'RED', 'Canon EOS R', 'Sony A1', 'Leica', 'Hasselblad', 'Phase One']
692
  for equipment in cinema_equipment:
693
  if equipment.lower() in prompt.lower():
694
- tech_score += 6
695
  break
696
 
697
  # Lens specifications
698
  if re.search(r'\d+mm.*f/[\d.]+', prompt):
699
- tech_score += 5
700
 
701
- # Camera angles (NEW - high value)
702
- angle_terms = ['low-angle shot', 'high-angle shot', 'eye-level shot', 'dutch angle', 'bird\'s eye', 'worm\'s eye']
703
- tech_score += sum(4 for term in angle_terms if term in prompt.lower())
704
-
705
- # Anamorphic and specialized lenses
706
- if 'anamorphic' in prompt.lower():
707
  tech_score += 4
708
 
709
  # Professional terminology
710
- tech_keywords = ['shot on', 'lens', 'cinematography', 'lighting']
711
- for keyword in tech_keywords:
712
- if keyword in prompt.lower():
713
- tech_score += 2
714
 
715
- # Bonus for BAGEL cinematography suggestions
716
- if analysis_data and analysis_data.get("has_camera_suggestion"):
717
- tech_score += 8
718
-
719
  breakdown["technical_details"] = min(25, tech_score)
720
 
721
- # Professional Cinematography (0-25 points) - Enhanced with angle detection
722
  cinema_score = 0
723
 
724
- # Camera angles (high value for professional cinematography)
725
- angle_terms = ['low-angle', 'high-angle', 'eye-level', 'dutch angle', 'bird\'s eye', 'worm\'s eye']
726
- cinema_score += sum(5 for term in angle_terms if term in prompt.lower())
727
-
728
  # Professional lighting techniques
729
- lighting_terms = ['cinematic lighting', 'dramatic lighting', 'studio lighting', 'rim light', 'practical lights']
730
- cinema_score += sum(3 for term in lighting_terms if term in prompt.lower())
731
 
732
  # Composition techniques
733
- composition_terms = ['composition', 'framing', 'depth of field', 'bokeh', 'rule of thirds', 'foreground', 'background']
734
- cinema_score += sum(2 for term in composition_terms if term in prompt.lower())
735
-
736
- # Cinematography style elements
737
- style_terms = ['film grain', 'anamorphic', 'telephoto compression', 'wide-angle', 'shallow depth']
738
- cinema_score += sum(3 for term in style_terms if term in prompt.lower())
739
 
740
  # Professional context bonus
741
- if analysis_data and analysis_data.get("cinematography_context_applied"):
742
- cinema_score += 5
743
 
744
  breakdown["professional_cinematography"] = min(25, cinema_score)
745
 
746
- # Multi-Engine Optimization (0-25 points) - Token economy aware
747
  optimization_score = 0
748
 
749
- # Check for technical specifications (more valuable than generic keywords)
750
- if re.search(r'(?:Canon|Sony|Leica|ARRI|RED|Hasselblad|Phase One)', prompt):
751
- optimization_score += 8 # Higher score for actual camera specs
752
 
 
753
  if re.search(r'\d+mm.*f/[\d.]+.*ISO \d+', prompt):
754
- optimization_score += 7 # Complete technical specs
755
-
756
- # Token economy bonus: penalize redundant keywords
757
- redundant_keywords = ['photorealistic', 'ultra-detailed', 'professional photography']
758
- has_camera_specs = bool(re.search(r'(?:Canon|Sony|Leica|ARRI|RED)', prompt))
759
 
760
- if has_camera_specs:
761
- # Bonus for NOT having redundant keywords when camera specs present
762
- redundant_count = sum(1 for keyword in redundant_keywords if keyword in prompt.lower())
763
- optimization_score += max(0, 5 - redundant_count * 2) # Penalty for redundancy
764
- else:
765
- # If no camera specs, quality keywords are valuable
766
- quality_keywords = sum(1 for keyword in redundant_keywords if keyword in prompt.lower())
767
- optimization_score += min(5, quality_keywords * 2)
768
 
769
- # Scene-specific optimization
770
- if any(style in prompt for style in FLUX_RULES.get("style_enhancements", {}).values()):
771
- optimization_score += 3
772
-
773
- # Length efficiency bonus
774
  word_count = len(prompt.split())
775
- if word_count <= 120: # Reward conciseness
776
- optimization_score += 2
 
 
777
 
778
  breakdown["multi_engine_optimization"] = min(25, optimization_score)
779
 
780
- # Calculate total with enhanced weighting
781
  total_score = sum(breakdown.values())
782
 
783
  return total_score, breakdown
@@ -794,7 +541,6 @@ def calculate_professional_enhanced_score(prompt: str, analysis_data: Optional[D
794
  Returns:
795
  Tuple of (total_score, breakdown_dict)
796
  """
797
- # Use the enhanced scoring system
798
  return calculate_prompt_score(prompt, analysis_data)
799
 
800
 
@@ -842,9 +588,9 @@ def format_analysis_report(analysis_data: Dict[str, Any], processing_time: float
842
  **Professional Context:** {'✅ Applied' if has_cinema_context else '❌ Not Applied'}
843
 
844
  **🎯 OPTIMIZATIONS APPLIED:**
845
- Camera angle detection
846
  ✅ Professional camera configuration
847
- Cinematography lighting setup
848
  ✅ Token economy optimization
849
  ✅ Multi-engine compatibility
850
  ✅ Redundancy elimination
 
142
  if not prompt or not isinstance(prompt, str):
143
  return ""
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  try:
146
+ # Step 1: Extract and clean the core description
147
+ core_description = _extract_clean_description(prompt)
148
+ if not core_description:
149
+ return "Professional photograph with technical excellence"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ # Step 2: Get camera configuration
152
+ camera_setup = _get_camera_setup(analysis_metadata, core_description)
 
 
 
 
153
 
154
+ # Step 3: Get essential style keywords
155
+ style_keywords = _get_essential_keywords(core_description, camera_setup, analysis_metadata)
 
 
 
 
156
 
157
+ # Step 4: Build final optimized prompt
158
+ final_prompt = _build_optimized_prompt(core_description, camera_setup, style_keywords)
 
 
 
 
159
 
160
+ logger.info(f"Prompt optimized: {len(prompt)} → {len(final_prompt)} chars")
161
+ return final_prompt
162
 
163
  except Exception as e:
164
+ logger.error(f"Prompt optimization failed: {e}")
165
+ return _create_fallback_prompt(prompt)
166
 
167
 
168
+ def _extract_clean_description(prompt: str) -> str:
169
+ """Extract and clean the core description from BAGEL output"""
170
  try:
171
+ # Remove CAMERA_SETUP section
172
+ if "CAMERA_SETUP:" in prompt:
173
+ description = prompt.split("CAMERA_SETUP:")[0].strip()
174
+ elif "2. CAMERA_SETUP" in prompt:
175
+ description = prompt.split("2. CAMERA_SETUP")[0].strip()
176
+ else:
177
+ description = prompt
178
+
179
+ # Remove section headers
180
+ description = re.sub(r'^(DESCRIPTION:|1\.\s*DESCRIPTION:)\s*', '', description, flags=re.IGNORECASE)
181
+
182
+ # Remove verbose introduction phrases
183
+ remove_patterns = [
184
+ r'^This image (?:features|shows|depicts|presents|captures)',
185
+ r'^The image (?:features|shows|depicts|presents|captures)',
186
+ r'^This (?:photograph|picture|scene) (?:features|shows|depicts)',
187
+ r'^(?:In this image,?|Looking at this image,?)',
188
+ r'(?:possibly|apparently|seemingly|appears to be|seems to be)',
189
  ]
190
 
191
+ for pattern in remove_patterns:
192
+ description = re.sub(pattern, '', description, flags=re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
+ # Convert to concise, direct language
195
+ description = _convert_to_direct_language(description)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
+ # Clean up formatting
198
+ description = re.sub(r'\s+', ' ', description).strip()
199
 
200
+ # Limit length for efficiency
201
+ if len(description) > 200:
202
+ sentences = re.split(r'[.!?]', description)
203
+ description = sentences[0] if sentences else description[:200]
204
+
205
+ return description.strip()
 
206
 
207
+ except Exception as e:
208
+ logger.warning(f"Description extraction failed: {e}")
209
+ return prompt[:100] if prompt else ""
210
+
211
+
212
+ def _convert_to_direct_language(text: str) -> str:
213
+ """Convert verbose descriptive text to direct, concise language"""
214
+ try:
215
+ # Direct conversions for common verbose phrases
216
+ conversions = [
217
+ # Subject identification
218
+ (r'a (?:person|individual|figure|man|woman) (?:who is|that is)', r'person'),
219
+ (r' (?:who is|that is) (?:wearing|dressed in)', r' wearing'),
220
+ (r' (?:who appears to be|that appears to be)', r''),
221
+
222
+ # Location simplification
223
+ (r'(?:what appears to be|what seems to be) (?:a|an)', r''),
224
+ (r'in (?:what looks like|what appears to be) (?:a|an)', r'in'),
225
+ (r'(?:standing|sitting|positioned) in (?:the middle of|the center of)', r'in'),
226
+
227
+ # Action simplification
228
+ (r'(?:is|are) (?:currently|presently) (?:engaged in|performing)', r''),
229
+ (r'(?:can be seen|is visible|are visible)', r''),
230
+
231
+ # Background simplification
232
+ (r'(?:In the background|Behind (?:him|her|them)),? (?:there (?:is|are)|we can see)', r'Background:'),
233
+ (r'The background (?:features|shows|contains)', r'Background:'),
234
+
235
+ # Remove filler words
236
+ (r'\b(?:quite|rather|somewhat|fairly|very|extremely)\b', r''),
237
+ (r'\b(?:overall|generally|typically|usually)\b', r''),
238
+ ]
239
 
240
+ result = text
241
+ for pattern, replacement in conversions:
242
+ result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)
 
243
 
244
  # Clean up extra spaces and punctuation
245
+ result = re.sub(r'\s+', ' ', result)
246
+ result = re.sub(r'\s*,\s*,+', ',', result)
247
+ result = re.sub(r'^\s*,\s*', '', result)
 
 
 
 
 
 
248
 
249
+ return result.strip()
 
250
 
251
  except Exception as e:
252
+ logger.warning(f"Language conversion failed: {e}")
253
+ return text
254
 
255
 
256
+ def _get_camera_setup(analysis_metadata: Optional[Dict[str, Any]], description: str) -> str:
257
+ """Get camera setup configuration"""
258
  try:
259
+ # Check if BAGEL provided camera setup
260
+ if analysis_metadata and analysis_metadata.get("has_camera_suggestion"):
261
+ camera_setup = analysis_metadata.get("camera_setup", "")
262
+ if camera_setup and len(camera_setup) > 10:
263
+ return _format_camera_setup(camera_setup)
264
 
265
+ # Detect scene type and provide appropriate camera setup
266
+ scene_type = _detect_scene_from_content(description)
267
+ return _get_scene_camera_setup(scene_type)
268
 
269
+ except Exception as e:
270
+ logger.warning(f"Camera setup detection failed: {e}")
271
+ return "shot on professional camera"
272
+
273
+
274
+ def _format_camera_setup(raw_setup: str) -> str:
275
+ """Format camera setup into clean, concise format"""
276
+ try:
277
+ # Extract camera model
278
+ camera_patterns = [
279
+ r'(Canon EOS R\d+)',
280
+ r'(Sony A\d+[^\s,]*)',
281
+ r'(Leica [^\s,]+)',
282
+ r'(Phase One [^\s,]+)',
283
+ r'(Hasselblad [^\s,]+)',
284
+ r'(ARRI [^\s,]+)',
285
+ r'(RED [^\s,]+)'
286
+ ]
287
 
288
+ camera = None
289
+ for pattern in camera_patterns:
290
+ match = re.search(pattern, raw_setup, re.IGNORECASE)
291
+ if match:
292
+ camera = match.group(1)
293
+ break
294
+
295
+ # Extract lens info
296
+ lens_pattern = r'(\d+mm[^,]*f/[\d.]+[^,]*)'
297
+ lens_match = re.search(lens_pattern, raw_setup, re.IGNORECASE)
298
+ lens = lens_match.group(1) if lens_match else None
299
+
300
+ # Extract ISO
301
+ iso_pattern = r'(ISO \d+)'
302
+ iso_match = re.search(iso_pattern, raw_setup, re.IGNORECASE)
303
+ iso = iso_match.group(1) if iso_match else None
304
+
305
+ # Build clean setup
306
+ parts = []
307
+ if camera:
308
+ parts.append(camera)
309
+ if lens:
310
+ parts.append(lens)
311
+ if iso:
312
+ parts.append(iso)
313
+
314
+ if parts:
315
+ return f"shot on {', '.join(parts)}"
 
 
 
316
  else:
317
+ return "professional photography"
 
318
 
319
  except Exception as e:
320
+ logger.warning(f"Camera setup formatting failed: {e}")
321
+ return "professional photography"
322
+
323
+
324
+ def _detect_scene_from_content(description: str) -> str:
325
+ """Detect scene type from description content"""
326
+ description_lower = description.lower()
327
+
328
+ # Scene detection patterns
329
+ if any(term in description_lower for term in ["portrait", "person", "man", "woman", "face"]):
330
+ return "portrait"
331
+ elif any(term in description_lower for term in ["landscape", "mountain", "horizon", "nature", "outdoor"]):
332
+ return "landscape"
333
+ elif any(term in description_lower for term in ["street", "urban", "city", "building", "crowd"]):
334
+ return "street"
335
+ elif any(term in description_lower for term in ["architecture", "building", "structure", "interior"]):
336
+ return "architecture"
337
+ else:
338
+ return "general"
339
+
340
 
341
+ def _get_scene_camera_setup(scene_type: str) -> str:
342
+ """Get camera setup based on scene type"""
343
+ setups = {
344
+ "portrait": "shot on Canon EOS R5, 85mm f/1.4 lens, ISO 200",
345
+ "landscape": "shot on Phase One XT, 24-70mm f/4 lens, ISO 100",
346
+ "street": "shot on Leica M11, 35mm f/1.4 lens, ISO 800",
347
+ "architecture": "shot on Canon EOS R5, 24-70mm f/2.8 lens, ISO 100",
348
+ "general": "shot on Canon EOS R6, 50mm f/1.8 lens, ISO 400"
349
+ }
350
+
351
+ return setups.get(scene_type, setups["general"])
352
 
353
+
354
+ def _get_essential_keywords(description: str, camera_setup: str, analysis_metadata: Optional[Dict[str, Any]]) -> List[str]:
355
+ """Get essential style keywords without redundancy"""
356
  try:
357
+ keywords = []
358
+ description_lower = description.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
 
360
+ # Only add depth of field if not already mentioned
361
+ if "depth" not in description_lower and "bokeh" not in description_lower:
362
+ if any(term in camera_setup for term in ["f/1.4", "f/2.8", "85mm"]):
363
+ keywords.append("shallow depth of field")
364
 
365
+ # Add professional photography only if no specific camera mentioned
366
+ if "shot on" not in camera_setup:
367
+ keywords.append("professional photography")
 
 
 
 
368
 
369
+ # Scene-specific keywords
370
+ if "portrait" in description_lower and "studio lighting" not in description_lower:
371
+ keywords.append("professional portrait")
 
372
 
373
+ # Technical quality (only if needed)
374
+ if len(keywords) < 2:
375
+ keywords.append("high quality")
376
 
377
+ return keywords[:3] # Limit to 3 essential keywords
 
378
 
379
+ except Exception as e:
380
+ logger.warning(f"Keyword extraction failed: {e}")
381
+ return ["professional photography"]
382
+
383
+
384
+ def _build_optimized_prompt(description: str, camera_setup: str, keywords: List[str]) -> str:
385
+ """Build final optimized prompt with proper structure"""
386
+ try:
387
+ # Structure: Description + Technical + Style
388
+ parts = []
389
+
390
+ # Core description (clean and concise)
391
+ if description:
392
+ parts.append(description)
393
+
394
+ # Technical setup
395
+ if camera_setup:
396
+ parts.append(camera_setup)
397
+
398
+ # Essential keywords
399
+ if keywords:
400
+ parts.extend(keywords)
401
+
402
+ # Join with consistent separator
403
+ result = ", ".join(parts)
404
+
405
+ # Final cleanup
406
+ result = re.sub(r'\s*,\s*,+', ',', result) # Remove double commas
407
+ result = re.sub(r'\s+', ' ', result) # Clean spaces
408
+ result = result.strip().rstrip(',') # Remove trailing comma
409
 
410
+ # Ensure it starts with capital letter
411
+ if result:
412
+ result = result[0].upper() + result[1:] if len(result) > 1 else result.upper()
413
 
414
+ return result
415
 
416
  except Exception as e:
417
+ logger.error(f"Prompt building failed: {e}")
418
+ return "Professional photograph"
419
 
420
 
421
+ def _create_fallback_prompt(original_prompt: str) -> str:
422
+ """Create fallback prompt when optimization fails"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  try:
424
+ # Extract first meaningful sentence
425
+ sentences = re.split(r'[.!?]', original_prompt)
426
+ if sentences:
427
+ clean_sentence = sentences[0].strip()
428
+ # Remove verbose starters
429
+ clean_sentence = re.sub(r'^(This image shows|The image depicts|This photograph)', '', clean_sentence, flags=re.IGNORECASE)
430
+ clean_sentence = clean_sentence.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
 
432
+ if len(clean_sentence) > 20:
433
+ return f"{clean_sentence}, professional photography"
434
+
435
+ return "Professional photograph with technical excellence"
436
+
437
+ except Exception:
438
+ return "Professional photograph"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
 
440
 
441
  def calculate_prompt_score(prompt: str, analysis_data: Optional[Dict[str, Any]] = None) -> Tuple[int, Dict[str, int]]:
 
456
 
457
  # Enhanced Prompt Quality (0-25 points)
458
  length_score = min(15, len(prompt) // 10) # Reward appropriate length
459
+ detail_score = min(10, len(prompt.split(',')) * 2) # Reward structured detail
460
  breakdown["prompt_quality"] = int(length_score + detail_score)
461
 
462
  # Technical Details with Cinematography Focus (0-25 points)
463
  tech_score = 0
464
 
465
  # Cinema equipment (higher scores for professional gear)
466
+ cinema_equipment = ['Canon EOS R', 'Sony A1', 'Leica', 'Hasselblad', 'Phase One', 'ARRI', 'RED']
467
  for equipment in cinema_equipment:
468
  if equipment.lower() in prompt.lower():
469
+ tech_score += 8
470
  break
471
 
472
  # Lens specifications
473
  if re.search(r'\d+mm.*f/[\d.]+', prompt):
474
+ tech_score += 6
475
 
476
+ # ISO settings
477
+ if re.search(r'ISO \d+', prompt):
 
 
 
 
478
  tech_score += 4
479
 
480
  # Professional terminology
481
+ tech_keywords = ['shot on', 'lens', 'depth of field', 'bokeh']
482
+ tech_score += sum(3 for keyword in tech_keywords if keyword in prompt.lower())
 
 
483
 
 
 
 
 
484
  breakdown["technical_details"] = min(25, tech_score)
485
 
486
+ # Professional Cinematography (0-25 points)
487
  cinema_score = 0
488
 
 
 
 
 
489
  # Professional lighting techniques
490
+ lighting_terms = ['professional lighting', 'studio lighting', 'natural lighting']
491
+ cinema_score += sum(4 for term in lighting_terms if term in prompt.lower())
492
 
493
  # Composition techniques
494
+ composition_terms = ['composition', 'depth of field', 'bokeh', 'shallow depth']
495
+ cinema_score += sum(3 for term in composition_terms if term in prompt.lower())
 
 
 
 
496
 
497
  # Professional context bonus
498
+ if analysis_data and analysis_data.get("has_camera_suggestion"):
499
+ cinema_score += 6
500
 
501
  breakdown["professional_cinematography"] = min(25, cinema_score)
502
 
503
+ # Multi-Engine Optimization (0-25 points)
504
  optimization_score = 0
505
 
506
+ # Check for technical specifications
507
+ if re.search(r'(?:Canon|Sony|Leica|Phase One)', prompt):
508
+ optimization_score += 10
509
 
510
+ # Complete technical specs
511
  if re.search(r'\d+mm.*f/[\d.]+.*ISO \d+', prompt):
512
+ optimization_score += 8
 
 
 
 
513
 
514
+ # Professional terminology
515
+ pro_terms = ['professional', 'shot on', 'high quality']
516
+ optimization_score += sum(2 for term in pro_terms if term in prompt.lower())
 
 
 
 
 
517
 
518
+ # Length efficiency bonus (reward conciseness)
 
 
 
 
519
  word_count = len(prompt.split())
520
+ if 30 <= word_count <= 60: # Optimal range
521
+ optimization_score += 5
522
+ elif word_count <= 30:
523
+ optimization_score += 3
524
 
525
  breakdown["multi_engine_optimization"] = min(25, optimization_score)
526
 
527
+ # Calculate total
528
  total_score = sum(breakdown.values())
529
 
530
  return total_score, breakdown
 
541
  Returns:
542
  Tuple of (total_score, breakdown_dict)
543
  """
 
544
  return calculate_prompt_score(prompt, analysis_data)
545
 
546
 
 
588
  **Professional Context:** {'✅ Applied' if has_cinema_context else '❌ Not Applied'}
589
 
590
  **🎯 OPTIMIZATIONS APPLIED:**
591
+ Clean description extraction
592
  ✅ Professional camera configuration
593
+ Essential keyword optimization
594
  ✅ Token economy optimization
595
  ✅ Multi-engine compatibility
596
  ✅ Redundancy elimination