Spaces:
Running
on
Zero
Running
on
Zero
""" | |
Ultra Supreme Analyzer for image analysis and prompt building | |
VERSIÓN MEJORADA - Potencia CLIP en lugar de limitarlo | |
""" | |
import re | |
from typing import Dict, List, Any, Tuple | |
import logging | |
logger = logging.getLogger(__name__) | |
class UltraSupremeAnalyzer: | |
""" | |
ULTRA SUPREME ANALYSIS ENGINE - POTENCIA CLIP, NO LO LIMITA | |
""" | |
def __init__(self): | |
# Palabras a limpiar de las descripciones de CLIP | |
self.cleanup_patterns = [ | |
r'arafed\s*', | |
r'there is\s*', | |
r'a photo of\s*', | |
r'an image of\s*', | |
r'a picture of\s*', | |
r'inspired by [^,]+,?\s*', | |
r'by [A-Z][^,]+,?\s*', | |
r'trending on [^,]+,?\s*', | |
r'featured on [^,]+,?\s*', | |
r'\d+k\s*', | |
r'::\s*::\s*', | |
r'contest winner,?\s*', | |
r'award winning,?\s*', | |
] | |
# Indicadores de calidad técnica | |
self.technical_indicators = { | |
'portrait': ['portrait', 'headshot', 'face', 'person', 'man', 'woman', 'child'], | |
'landscape': ['mountain', 'landscape', 'nature', 'outdoor', 'field', 'forest'], | |
'dramatic': ['dramatic', 'light shining', 'silhouette', 'backlit', 'atmospheric'], | |
'professional': ['professional', 'studio', 'formal', 'business'], | |
'artistic': ['artistic', 'creative', 'abstract', 'conceptual'], | |
'documentary': ['documentary', 'candid', 'street', 'journalism', 'authentic'] | |
} | |
# Mejoras de iluminación basadas en contexto | |
self.lighting_enhancements = { | |
'outdoor': 'natural lighting with golden hour warmth', | |
'mountain': 'dramatic alpine lighting with atmospheric haze', | |
'portrait': 'professional portrait lighting with subtle rim light', | |
'silhouette': 'dramatic backlighting creating ethereal silhouettes', | |
'indoor': 'soft diffused window lighting with gentle shadows', | |
'night': 'cinematic low-key lighting with strategic highlights', | |
'default': 'masterful lighting that enhances depth and dimension' | |
} | |
# Configuraciones de cámara según el tipo de foto | |
self.camera_configs = { | |
'portrait': 'Shot on Hasselblad X2D 100C, 90mm f/2.5 lens at f/2.8', | |
'landscape': 'Shot on Phase One XT, 40mm f/4 lens at f/8', | |
'dramatic': 'Shot on Canon R5, 85mm f/1.2 lens at f/2', | |
'street': 'Shot on Leica M11, 35mm f/1.4 lens at f/2.8', | |
'default': 'Shot on Phase One XF IQ4, 80mm f/2.8 lens at f/4' | |
} | |
def clean_clip_description(self, description: str) -> str: | |
"""Limpia la descripción de CLIP eliminando ruido pero preservando contenido valioso""" | |
cleaned = description.lower() | |
# Eliminar patrones de ruido | |
for pattern in self.cleanup_patterns: | |
cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE) | |
# Limpiar espacios múltiples y comas redundantes | |
cleaned = re.sub(r'\s+', ' ', cleaned) | |
cleaned = re.sub(r',\s*,+', ',', cleaned) | |
cleaned = re.sub(r'^\s*,\s*', '', cleaned) | |
cleaned = re.sub(r'\s*,\s*$', '', cleaned) | |
return cleaned.strip() | |
def extract_key_elements(self, clip_fast: str, clip_classic: str, clip_best: str) -> Dict[str, Any]: | |
"""Extrae elementos clave de las tres descripciones de CLIP""" | |
# Limpiar todas las descripciones | |
fast_clean = self.clean_clip_description(clip_fast) | |
classic_clean = self.clean_clip_description(clip_classic) | |
best_clean = self.clean_clip_description(clip_best) | |
# Combinar información única de las tres fuentes | |
all_descriptions = f"{fast_clean} {classic_clean} {best_clean}" | |
# Extraer elementos principales | |
elements = { | |
'main_subject': self._extract_main_subject(all_descriptions), | |
'action': self._extract_action(all_descriptions), | |
'location': self._extract_location(all_descriptions), | |
'mood': self._extract_mood(all_descriptions), | |
'special_features': self._extract_special_features(all_descriptions), | |
'technical_style': self._determine_technical_style(all_descriptions), | |
'original_essence': self._preserve_unique_elements(fast_clean, classic_clean, best_clean) | |
} | |
return elements | |
def _extract_main_subject(self, description: str) -> str: | |
"""Extrae el sujeto principal de la descripción""" | |
# Buscar patrones comunes de sujetos | |
subject_patterns = [ | |
r'(a |an )?([\w\s]+ )?(man|woman|person|child|boy|girl|people|group)', | |
r'(a |an )?([\w\s]+ )?(portrait|face|figure)', | |
r'(a |an )?([\w\s]+ )?(landscape|mountain|building|structure)', | |
r'(a |an )?([\w\s]+ )?(animal|dog|cat|bird)', | |
] | |
for pattern in subject_patterns: | |
match = re.search(pattern, description) | |
if match: | |
return match.group(0).strip() | |
# Si no encuentra un patrón específico, tomar las primeras palabras significativas | |
words = description.split() | |
if len(words) > 2: | |
return ' '.join(words[:3]) | |
return "figure" | |
def _extract_action(self, description: str) -> str: | |
"""Extrae la acción o pose del sujeto""" | |
action_keywords = ['standing', 'sitting', 'walking', 'running', 'looking', | |
'holding', 'wearing', 'posing', 'working', 'playing'] | |
for keyword in action_keywords: | |
if keyword in description: | |
# Extraer contexto alrededor de la palabra clave | |
pattern = rf'\b\w*\s*{keyword}\s*\w*\s*\w*' | |
match = re.search(pattern, description) | |
if match: | |
return match.group(0).strip() | |
return "" | |
def _extract_location(self, description: str) -> str: | |
"""Extrae información de ubicación o ambiente""" | |
location_keywords = ['mountain', 'beach', 'forest', 'city', 'street', 'indoor', | |
'outdoor', 'studio', 'nature', 'urban', 'field', 'desert', | |
'ocean', 'lake', 'building', 'home', 'office'] | |
found_locations = [] | |
for keyword in location_keywords: | |
if keyword in description: | |
found_locations.append(keyword) | |
if found_locations: | |
return ' '.join(found_locations[:2]) # Máximo 2 ubicaciones | |
return "" | |
def _extract_mood(self, description: str) -> str: | |
"""Extrae el mood o atmósfera de la imagen""" | |
mood_keywords = ['dramatic', 'peaceful', 'serene', 'intense', 'mysterious', | |
'joyful', 'melancholic', 'powerful', 'ethereal', 'moody', | |
'bright', 'dark', 'atmospheric', 'dreamy', 'dynamic'] | |
for keyword in mood_keywords: | |
if keyword in description: | |
return keyword | |
return "" | |
def _extract_special_features(self, description: str) -> List[str]: | |
"""Extrae características especiales únicas de la descripción""" | |
special_patterns = [ | |
'light shining on [\w\s]+', | |
'wearing [\w\s]+', | |
'with [\w\s]+ in the background', | |
'surrounded by [\w\s]+', | |
'[\w\s]+ lighting', | |
'[\w\s]+ atmosphere' | |
] | |
features = [] | |
for pattern in special_patterns: | |
matches = re.findall(pattern, description) | |
features.extend(matches) | |
return features[:3] # Limitar a 3 características especiales | |
def _determine_technical_style(self, description: str) -> str: | |
"""Determina el estilo técnico más apropiado basado en el contenido""" | |
style_scores = {} | |
for style, keywords in self.technical_indicators.items(): | |
score = sum(1 for keyword in keywords if keyword in description) | |
if score > 0: | |
style_scores[style] = score | |
if style_scores: | |
return max(style_scores, key=style_scores.get) | |
return 'default' | |
def _preserve_unique_elements(self, fast: str, classic: str, best: str) -> str: | |
"""Preserva elementos únicos e interesantes de las descripciones""" | |
# Encontrar frases únicas que aparecen en alguna descripción | |
all_words = set(fast.split() + classic.split() + best.split()) | |
common_words = set(['a', 'an', 'the', 'is', 'are', 'was', 'were', 'with', 'of', 'in', 'on', 'at']) | |
unique_words = all_words - common_words | |
# Buscar frases interesantes que contengan estas palabras únicas | |
unique_phrases = [] | |
for desc in [fast, classic, best]: | |
if 'light shining' in desc or 'adventure gear' in desc or 'anthropological' in desc: | |
# Estas son frases únicas valiosas | |
unique_phrases.append(desc) | |
break | |
return ' '.join(unique_phrases[:1]) if unique_phrases else "" | |
def build_ultra_supreme_prompt(self, elements: Dict[str, Any], original_descriptions: List[str]) -> str: | |
"""Construye un prompt que POTENCIA la visión de CLIP""" | |
components = [] | |
# 1. Sujeto principal con artículo apropiado | |
subject = elements['main_subject'] | |
if subject: | |
# Determinar artículo | |
if subject[0].lower() in 'aeiou': | |
components.append(f"An {subject}") | |
else: | |
components.append(f"A {subject}") | |
else: | |
components.append("A figure") | |
# 2. Acción si existe | |
if elements['action']: | |
components.append(elements['action']) | |
# 3. Características especiales (esto es lo que hace única la imagen) | |
if elements['special_features']: | |
for feature in elements['special_features'][:2]: | |
components.append(feature) | |
# 4. Ubicación/Ambiente | |
if elements['location']: | |
if 'mountain' in elements['location']: | |
components.append("on a majestic mountain peak") | |
elif 'outdoor' in elements['location'] or 'nature' in elements['location']: | |
components.append("in a breathtaking natural setting") | |
else: | |
components.append(f"in {elements['location']}") | |
# 5. Mood/Atmósfera si existe | |
if elements['mood']: | |
components.append(f"capturing a {elements['mood']} atmosphere") | |
# 6. Iluminación basada en contexto | |
lighting_context = elements['location'] or elements['technical_style'] | |
lighting = self.lighting_enhancements.get(lighting_context, self.lighting_enhancements['default']) | |
components.append(f"illuminated with {lighting}") | |
# 7. Configuración técnica de cámara | |
camera_setup = self.camera_configs.get(elements['technical_style'], self.camera_configs['default']) | |
components.append(camera_setup) | |
# 8. Estilo fotográfico final | |
if elements['technical_style'] == 'portrait': | |
components.append("masterful portrait photography") | |
elif elements['technical_style'] == 'landscape': | |
components.append("epic landscape photography") | |
elif elements['technical_style'] == 'dramatic': | |
components.append("cinematic photography with powerful visual impact") | |
elif elements['technical_style'] == 'documentary': | |
components.append("authentic documentary photography") | |
else: | |
components.append("professional photography with exceptional detail") | |
# 9. Añadir esencia única preservada si existe | |
if elements['original_essence'] and len(elements['original_essence']) > 10: | |
# Incluir elementos únicos que CLIP detectó | |
logger.info(f"Preservando esencia única: {elements['original_essence']}") | |
# Construir prompt final | |
prompt = ", ".join(components) | |
# Limpieza final | |
prompt = re.sub(r'\s+', ' ', prompt) | |
prompt = re.sub(r',\s*,+', ',', prompt) | |
prompt = re.sub(r'\s*,\s*', ', ', prompt) | |
# Capitalizar primera letra | |
if prompt: | |
prompt = prompt[0].upper() + prompt[1:] | |
logger.info(f"Prompt generado: {prompt}") | |
return prompt | |
def ultra_supreme_analysis(self, clip_fast: str, clip_classic: str, clip_best: str) -> Dict[str, Any]: | |
"""Análisis que POTENCIA la información de CLIP en lugar de limitarla""" | |
logger.info("Iniciando análisis MEJORADO que potencia CLIP") | |
# Extraer elementos clave de las descripciones | |
elements = self.extract_key_elements(clip_fast, clip_classic, clip_best) | |
# Construir resultado del análisis | |
result = { | |
"elements": elements, | |
"technical_style": elements['technical_style'], | |
"unique_features": elements['special_features'], | |
"preserved_essence": elements['original_essence'], | |
"mood": elements['mood'], | |
"location": elements['location'] | |
} | |
return result | |
def build_ultra_supreme_prompt(self, ultra_analysis: Dict[str, Any], clip_results: List[str]) -> str: | |
"""Versión pública del método para compatibilidad""" | |
return self.build_ultra_supreme_prompt(ultra_analysis['elements'], clip_results) | |
def calculate_ultra_supreme_score(self, prompt: str, ultra_analysis: Dict[str, Any]) -> Tuple[int, Dict[str, int]]: | |
"""Calcula score basado en la riqueza del prompt generado""" | |
score = 0 | |
breakdown = {} | |
# Estructura (20 puntos) | |
structure_score = 0 | |
if prompt.startswith(("A ", "An ")): | |
structure_score += 10 | |
if prompt.count(",") >= 5: | |
structure_score += 10 | |
score += structure_score | |
breakdown["structure"] = structure_score | |
# Elementos únicos preservados (30 puntos) | |
unique_score = 0 | |
if ultra_analysis.get('unique_features'): | |
unique_score += len(ultra_analysis['unique_features']) * 10 | |
unique_score = min(unique_score, 30) | |
score += unique_score | |
breakdown["unique"] = unique_score | |
# Contexto técnico (20 puntos) | |
tech_score = 0 | |
if "Shot on" in prompt: | |
tech_score += 10 | |
if any(term in prompt for term in ["f/", "mm"]): | |
tech_score += 10 | |
score += tech_score | |
breakdown["technical"] = tech_score | |
# Mood y atmósfera (15 puntos) | |
mood_score = 0 | |
if ultra_analysis.get('mood'): | |
mood_score += 15 | |
score += mood_score | |
breakdown["mood"] = mood_score | |
# Calidad descriptiva (15 puntos) | |
desc_score = 0 | |
if len(prompt) > 100: | |
desc_score += 10 | |
if any(term in prompt for term in ["masterful", "epic", "cinematic", "exceptional"]): | |
desc_score += 5 | |
score += desc_score | |
breakdown["descriptive"] = desc_score | |
return min(score, 100), breakdown |