Spaces:
Running
on
Zero
Running
on
Zero
File size: 15,609 Bytes
85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b 325e056 85f2f4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 |
"""
Ultra Supreme Analyzer for image analysis and prompt building
VERSIÓN MEJORADA - Potencia CLIP en lugar de limitarlo
"""
import re
from typing import Dict, List, Any, Tuple
import logging
logger = logging.getLogger(__name__)
class UltraSupremeAnalyzer:
"""
ULTRA SUPREME ANALYSIS ENGINE - POTENCIA CLIP, NO LO LIMITA
"""
def __init__(self):
# Palabras a limpiar de las descripciones de CLIP
self.cleanup_patterns = [
r'arafed\s*',
r'there is\s*',
r'a photo of\s*',
r'an image of\s*',
r'a picture of\s*',
r'inspired by [^,]+,?\s*',
r'by [A-Z][^,]+,?\s*',
r'trending on [^,]+,?\s*',
r'featured on [^,]+,?\s*',
r'\d+k\s*',
r'::\s*::\s*',
r'contest winner,?\s*',
r'award winning,?\s*',
]
# Indicadores de calidad técnica
self.technical_indicators = {
'portrait': ['portrait', 'headshot', 'face', 'person', 'man', 'woman', 'child'],
'landscape': ['mountain', 'landscape', 'nature', 'outdoor', 'field', 'forest'],
'dramatic': ['dramatic', 'light shining', 'silhouette', 'backlit', 'atmospheric'],
'professional': ['professional', 'studio', 'formal', 'business'],
'artistic': ['artistic', 'creative', 'abstract', 'conceptual'],
'documentary': ['documentary', 'candid', 'street', 'journalism', 'authentic']
}
# Mejoras de iluminación basadas en contexto
self.lighting_enhancements = {
'outdoor': 'natural lighting with golden hour warmth',
'mountain': 'dramatic alpine lighting with atmospheric haze',
'portrait': 'professional portrait lighting with subtle rim light',
'silhouette': 'dramatic backlighting creating ethereal silhouettes',
'indoor': 'soft diffused window lighting with gentle shadows',
'night': 'cinematic low-key lighting with strategic highlights',
'default': 'masterful lighting that enhances depth and dimension'
}
# Configuraciones de cámara según el tipo de foto
self.camera_configs = {
'portrait': 'Shot on Hasselblad X2D 100C, 90mm f/2.5 lens at f/2.8',
'landscape': 'Shot on Phase One XT, 40mm f/4 lens at f/8',
'dramatic': 'Shot on Canon R5, 85mm f/1.2 lens at f/2',
'street': 'Shot on Leica M11, 35mm f/1.4 lens at f/2.8',
'default': 'Shot on Phase One XF IQ4, 80mm f/2.8 lens at f/4'
}
def clean_clip_description(self, description: str) -> str:
"""Limpia la descripción de CLIP eliminando ruido pero preservando contenido valioso"""
cleaned = description.lower()
# Eliminar patrones de ruido
for pattern in self.cleanup_patterns:
cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE)
# Limpiar espacios múltiples y comas redundantes
cleaned = re.sub(r'\s+', ' ', cleaned)
cleaned = re.sub(r',\s*,+', ',', cleaned)
cleaned = re.sub(r'^\s*,\s*', '', cleaned)
cleaned = re.sub(r'\s*,\s*$', '', cleaned)
return cleaned.strip()
def extract_key_elements(self, clip_fast: str, clip_classic: str, clip_best: str) -> Dict[str, Any]:
"""Extrae elementos clave de las tres descripciones de CLIP"""
# Limpiar todas las descripciones
fast_clean = self.clean_clip_description(clip_fast)
classic_clean = self.clean_clip_description(clip_classic)
best_clean = self.clean_clip_description(clip_best)
# Combinar información única de las tres fuentes
all_descriptions = f"{fast_clean} {classic_clean} {best_clean}"
# Extraer elementos principales
elements = {
'main_subject': self._extract_main_subject(all_descriptions),
'action': self._extract_action(all_descriptions),
'location': self._extract_location(all_descriptions),
'mood': self._extract_mood(all_descriptions),
'special_features': self._extract_special_features(all_descriptions),
'technical_style': self._determine_technical_style(all_descriptions),
'original_essence': self._preserve_unique_elements(fast_clean, classic_clean, best_clean)
}
return elements
def _extract_main_subject(self, description: str) -> str:
"""Extrae el sujeto principal de la descripción"""
# Buscar patrones comunes de sujetos
subject_patterns = [
r'(a |an )?([\w\s]+ )?(man|woman|person|child|boy|girl|people|group)',
r'(a |an )?([\w\s]+ )?(portrait|face|figure)',
r'(a |an )?([\w\s]+ )?(landscape|mountain|building|structure)',
r'(a |an )?([\w\s]+ )?(animal|dog|cat|bird)',
]
for pattern in subject_patterns:
match = re.search(pattern, description)
if match:
return match.group(0).strip()
# Si no encuentra un patrón específico, tomar las primeras palabras significativas
words = description.split()
if len(words) > 2:
return ' '.join(words[:3])
return "figure"
def _extract_action(self, description: str) -> str:
"""Extrae la acción o pose del sujeto"""
action_keywords = ['standing', 'sitting', 'walking', 'running', 'looking',
'holding', 'wearing', 'posing', 'working', 'playing']
for keyword in action_keywords:
if keyword in description:
# Extraer contexto alrededor de la palabra clave
pattern = rf'\b\w*\s*{keyword}\s*\w*\s*\w*'
match = re.search(pattern, description)
if match:
return match.group(0).strip()
return ""
def _extract_location(self, description: str) -> str:
"""Extrae información de ubicación o ambiente"""
location_keywords = ['mountain', 'beach', 'forest', 'city', 'street', 'indoor',
'outdoor', 'studio', 'nature', 'urban', 'field', 'desert',
'ocean', 'lake', 'building', 'home', 'office']
found_locations = []
for keyword in location_keywords:
if keyword in description:
found_locations.append(keyword)
if found_locations:
return ' '.join(found_locations[:2]) # Máximo 2 ubicaciones
return ""
def _extract_mood(self, description: str) -> str:
"""Extrae el mood o atmósfera de la imagen"""
mood_keywords = ['dramatic', 'peaceful', 'serene', 'intense', 'mysterious',
'joyful', 'melancholic', 'powerful', 'ethereal', 'moody',
'bright', 'dark', 'atmospheric', 'dreamy', 'dynamic']
for keyword in mood_keywords:
if keyword in description:
return keyword
return ""
def _extract_special_features(self, description: str) -> List[str]:
"""Extrae características especiales únicas de la descripción"""
special_patterns = [
'light shining on [\w\s]+',
'wearing [\w\s]+',
'with [\w\s]+ in the background',
'surrounded by [\w\s]+',
'[\w\s]+ lighting',
'[\w\s]+ atmosphere'
]
features = []
for pattern in special_patterns:
matches = re.findall(pattern, description)
features.extend(matches)
return features[:3] # Limitar a 3 características especiales
def _determine_technical_style(self, description: str) -> str:
"""Determina el estilo técnico más apropiado basado en el contenido"""
style_scores = {}
for style, keywords in self.technical_indicators.items():
score = sum(1 for keyword in keywords if keyword in description)
if score > 0:
style_scores[style] = score
if style_scores:
return max(style_scores, key=style_scores.get)
return 'default'
def _preserve_unique_elements(self, fast: str, classic: str, best: str) -> str:
"""Preserva elementos únicos e interesantes de las descripciones"""
# Encontrar frases únicas que aparecen en alguna descripción
all_words = set(fast.split() + classic.split() + best.split())
common_words = set(['a', 'an', 'the', 'is', 'are', 'was', 'were', 'with', 'of', 'in', 'on', 'at'])
unique_words = all_words - common_words
# Buscar frases interesantes que contengan estas palabras únicas
unique_phrases = []
for desc in [fast, classic, best]:
if 'light shining' in desc or 'adventure gear' in desc or 'anthropological' in desc:
# Estas son frases únicas valiosas
unique_phrases.append(desc)
break
return ' '.join(unique_phrases[:1]) if unique_phrases else ""
def build_ultra_supreme_prompt(self, elements: Dict[str, Any], original_descriptions: List[str]) -> str:
"""Construye un prompt que POTENCIA la visión de CLIP"""
components = []
# 1. Sujeto principal con artículo apropiado
subject = elements['main_subject']
if subject:
# Determinar artículo
if subject[0].lower() in 'aeiou':
components.append(f"An {subject}")
else:
components.append(f"A {subject}")
else:
components.append("A figure")
# 2. Acción si existe
if elements['action']:
components.append(elements['action'])
# 3. Características especiales (esto es lo que hace única la imagen)
if elements['special_features']:
for feature in elements['special_features'][:2]:
components.append(feature)
# 4. Ubicación/Ambiente
if elements['location']:
if 'mountain' in elements['location']:
components.append("on a majestic mountain peak")
elif 'outdoor' in elements['location'] or 'nature' in elements['location']:
components.append("in a breathtaking natural setting")
else:
components.append(f"in {elements['location']}")
# 5. Mood/Atmósfera si existe
if elements['mood']:
components.append(f"capturing a {elements['mood']} atmosphere")
# 6. Iluminación basada en contexto
lighting_context = elements['location'] or elements['technical_style']
lighting = self.lighting_enhancements.get(lighting_context, self.lighting_enhancements['default'])
components.append(f"illuminated with {lighting}")
# 7. Configuración técnica de cámara
camera_setup = self.camera_configs.get(elements['technical_style'], self.camera_configs['default'])
components.append(camera_setup)
# 8. Estilo fotográfico final
if elements['technical_style'] == 'portrait':
components.append("masterful portrait photography")
elif elements['technical_style'] == 'landscape':
components.append("epic landscape photography")
elif elements['technical_style'] == 'dramatic':
components.append("cinematic photography with powerful visual impact")
elif elements['technical_style'] == 'documentary':
components.append("authentic documentary photography")
else:
components.append("professional photography with exceptional detail")
# 9. Añadir esencia única preservada si existe
if elements['original_essence'] and len(elements['original_essence']) > 10:
# Incluir elementos únicos que CLIP detectó
logger.info(f"Preservando esencia única: {elements['original_essence']}")
# Construir prompt final
prompt = ", ".join(components)
# Limpieza final
prompt = re.sub(r'\s+', ' ', prompt)
prompt = re.sub(r',\s*,+', ',', prompt)
prompt = re.sub(r'\s*,\s*', ', ', prompt)
# Capitalizar primera letra
if prompt:
prompt = prompt[0].upper() + prompt[1:]
logger.info(f"Prompt generado: {prompt}")
return prompt
def ultra_supreme_analysis(self, clip_fast: str, clip_classic: str, clip_best: str) -> Dict[str, Any]:
"""Análisis que POTENCIA la información de CLIP en lugar de limitarla"""
logger.info("Iniciando análisis MEJORADO que potencia CLIP")
# Extraer elementos clave de las descripciones
elements = self.extract_key_elements(clip_fast, clip_classic, clip_best)
# Construir resultado del análisis
result = {
"elements": elements,
"technical_style": elements['technical_style'],
"unique_features": elements['special_features'],
"preserved_essence": elements['original_essence'],
"mood": elements['mood'],
"location": elements['location']
}
return result
def build_ultra_supreme_prompt(self, ultra_analysis: Dict[str, Any], clip_results: List[str]) -> str:
"""Versión pública del método para compatibilidad"""
return self.build_ultra_supreme_prompt(ultra_analysis['elements'], clip_results)
def calculate_ultra_supreme_score(self, prompt: str, ultra_analysis: Dict[str, Any]) -> Tuple[int, Dict[str, int]]:
"""Calcula score basado en la riqueza del prompt generado"""
score = 0
breakdown = {}
# Estructura (20 puntos)
structure_score = 0
if prompt.startswith(("A ", "An ")):
structure_score += 10
if prompt.count(",") >= 5:
structure_score += 10
score += structure_score
breakdown["structure"] = structure_score
# Elementos únicos preservados (30 puntos)
unique_score = 0
if ultra_analysis.get('unique_features'):
unique_score += len(ultra_analysis['unique_features']) * 10
unique_score = min(unique_score, 30)
score += unique_score
breakdown["unique"] = unique_score
# Contexto técnico (20 puntos)
tech_score = 0
if "Shot on" in prompt:
tech_score += 10
if any(term in prompt for term in ["f/", "mm"]):
tech_score += 10
score += tech_score
breakdown["technical"] = tech_score
# Mood y atmósfera (15 puntos)
mood_score = 0
if ultra_analysis.get('mood'):
mood_score += 15
score += mood_score
breakdown["mood"] = mood_score
# Calidad descriptiva (15 puntos)
desc_score = 0
if len(prompt) > 100:
desc_score += 10
if any(term in prompt for term in ["masterful", "epic", "cinematic", "exceptional"]):
desc_score += 5
score += desc_score
breakdown["descriptive"] = desc_score
return min(score, 100), breakdown |