Malaji71 commited on
Commit
3d9a188
·
verified ·
1 Parent(s): 3e277be

Update optimizer.py

Browse files
Files changed (1) hide show
  1. optimizer.py +132 -61
optimizer.py CHANGED
@@ -30,9 +30,8 @@ class UltraSupremeOptimizer:
30
  self.usage_count = 0
31
  self.device = self._get_device()
32
  self.is_initialized = False
33
- # Inicializar modelo inmediatamente
34
- self.initialize_model()
35
-
36
  @staticmethod
37
  def _get_device() -> str:
38
  """Determine the best available device for computation"""
@@ -49,13 +48,13 @@ class UltraSupremeOptimizer:
49
  return True
50
 
51
  try:
52
- # Configuración estándar sin forzar precisión
53
  config = Config(
54
  clip_model_name="ViT-L-14/openai",
55
  download_cache=True,
56
  chunk_size=2048,
57
  quiet=True,
58
- device="cpu" # Inicializar en CPU
59
  )
60
 
61
  self.interrogator = Interrogator(config)
@@ -63,7 +62,8 @@ class UltraSupremeOptimizer:
63
 
64
  # Clean up memory after initialization
65
  gc.collect()
66
-
 
67
  return True
68
 
69
  except Exception as e:
@@ -86,8 +86,8 @@ class UltraSupremeOptimizer:
86
  if image.mode != 'RGB':
87
  image = image.convert('RGB')
88
 
89
- # Resize if too large - usar tamaño generoso para máxima calidad
90
- max_size = 1024 if self.device != "cpu" else 768
91
  if image.size[0] > max_size or image.size[1] > max_size:
92
  image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
93
 
@@ -109,7 +109,7 @@ class UltraSupremeOptimizer:
109
  r',\s*artstation',
110
  r',\s*concept art',
111
  r',\s*digital art',
112
- r',\s*by greg rutkowski', # Remover artistas genéricos overused
113
  ]
114
 
115
  cleaned_prompt = base_prompt
@@ -148,25 +148,101 @@ class UltraSupremeOptimizer:
148
 
149
  return final_prompt
150
 
151
- @spaces.GPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  def run_clip_inference(self, image: Image.Image) -> Tuple[str, str, str]:
153
  """Solo la inferencia CLIP usa GPU"""
154
  try:
155
- # Mover modelos a GPU sin forzar precisión
156
- if self.device == "cuda":
157
- # Configurar el dispositivo en el interrogator
158
- self.interrogator.config.device = "cuda"
 
 
 
 
 
 
 
 
 
159
 
160
- # Mover modelos a GPU manteniendo su precisión nativa
161
- if hasattr(self.interrogator, 'clip_model') and self.interrogator.clip_model is not None:
162
- self.interrogator.clip_model = self.interrogator.clip_model.to("cuda")
163
- logger.info("CLIP model moved to GPU with native precision")
164
 
165
- if hasattr(self.interrogator, 'blip_model') and self.interrogator.blip_model is not None:
166
- self.interrogator.blip_model = self.interrogator.blip_model.to("cuda")
167
- logger.info("BLIP model moved to GPU with native precision")
 
168
 
169
- # Ejecutar inferencias CLIP con precisión nativa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  full_prompt = self.interrogator.interrogate(image)
171
  clip_fast = self.interrogator.interrogate_fast(image)
172
  clip_classic = self.interrogator.interrogate_classic(image)
@@ -174,26 +250,8 @@ class UltraSupremeOptimizer:
174
  return full_prompt, clip_fast, clip_classic
175
 
176
  except Exception as e:
177
- logger.error(f"CLIP inference error: {e}")
178
- # Si falla en GPU, intentar en CPU
179
- if self.device == "cuda":
180
- logger.info("Falling back to CPU inference")
181
- self.interrogator.config.device = "cpu"
182
-
183
- if hasattr(self.interrogator, 'clip_model') and self.interrogator.clip_model is not None:
184
- self.interrogator.clip_model = self.interrogator.clip_model.to("cpu")
185
-
186
- if hasattr(self.interrogator, 'blip_model') and self.interrogator.blip_model is not None:
187
- self.interrogator.blip_model = self.interrogator.blip_model.to("cpu")
188
-
189
- # Reintentar en CPU
190
- full_prompt = self.interrogator.interrogate(image)
191
- clip_fast = self.interrogator.interrogate_fast(image)
192
- clip_classic = self.interrogator.interrogate_classic(image)
193
-
194
- return full_prompt, clip_fast, clip_classic
195
- else:
196
- raise e
197
 
198
  def generate_ultra_supreme_prompt(self, image: Any) -> Tuple[str, str, int, Dict[str, int]]:
199
  """
@@ -203,9 +261,10 @@ class UltraSupremeOptimizer:
203
  Tuple of (prompt, analysis_info, score, breakdown)
204
  """
205
  try:
206
- # Verificar que el modelo esté inicializado
207
  if not self.is_initialized:
208
- return "❌ Model initialization failed.", "Please refresh and try again.", 0, {}
 
209
 
210
  # Validate input
211
  if image is None:
@@ -222,17 +281,24 @@ class UltraSupremeOptimizer:
222
 
223
  logger.info("ULTRA SUPREME ANALYSIS - Starting pipeline")
224
 
225
- # Ejecutar inferencia CLIP en GPU
226
  full_prompt, clip_fast, clip_classic = self.run_clip_inference(image)
227
 
228
- logger.info(f"Prompt completo de CLIP Interrogator: {full_prompt}")
229
- logger.info(f"Análisis Fast: {clip_fast}")
230
- logger.info(f"Análisis Classic: {clip_classic}")
 
 
 
231
 
232
- # 3. Aplicar reglas de Flux al prompt completo
 
 
 
 
233
  optimized_prompt = self.apply_flux_rules(full_prompt)
234
 
235
- # 4. Crear análisis para el reporte (simplificado)
236
  analysis_summary = {
237
  "base_prompt": full_prompt,
238
  "clip_fast": clip_fast,
@@ -242,7 +308,7 @@ class UltraSupremeOptimizer:
242
  "detected_subject": self._detect_subject(full_prompt)
243
  }
244
 
245
- # 5. Calcular score basado en la riqueza del prompt
246
  score = self._calculate_score(optimized_prompt, full_prompt)
247
  breakdown = {
248
  "base_quality": min(len(full_prompt) // 10, 25),
@@ -268,7 +334,7 @@ class UltraSupremeOptimizer:
268
  return optimized_prompt, analysis_info, score, breakdown
269
 
270
  except Exception as e:
271
- logger.error(f"Ultra supreme generation error: {e}")
272
  return f"❌ Error: {str(e)}", "Please try with a different image.", 0, {}
273
 
274
  def _detect_style(self, prompt: str) -> str:
@@ -281,19 +347,23 @@ class UltraSupremeOptimizer:
281
  "dramatic": ["dramatic", "cinematic", "moody"]
282
  }
283
 
 
284
  for style_name, keywords in styles.items():
285
- if any(keyword in prompt.lower() for keyword in keywords):
286
  return style_name
287
 
288
  return "general"
289
 
290
  def _detect_subject(self, prompt: str) -> str:
291
  """Detecta el sujeto principal del prompt"""
 
 
 
292
  # Tomar las primeras palabras significativas
293
  words = prompt.split(',')[0].split()
294
  if len(words) > 3:
295
  return ' '.join(words[:4])
296
- return prompt.split(',')[0]
297
 
298
  def _calculate_score(self, optimized_prompt: str, base_prompt: str) -> int:
299
  """Calcula el score basado en la calidad del prompt"""
@@ -321,8 +391,9 @@ class UltraSupremeOptimizer:
321
  duration: float) -> str:
322
  """Generate detailed analysis report"""
323
 
324
- gpu_status = "⚡ ZeroGPU" if torch.cuda.is_available() else "💻 CPU"
325
- precision_info = "Native Model Precision" if torch.cuda.is_available() else "CPU Processing"
 
326
 
327
  # Extraer información clave
328
  detected_style = analysis.get("detected_style", "general").title()
@@ -337,8 +408,8 @@ class UltraSupremeOptimizer:
337
  **🧠 INTELLIGENT DETECTION:**
338
  - **Detected Style:** {detected_style}
339
  - **Main Subject:** {detected_subject}
340
- - **Precision:** Using native model precision for optimal performance
341
- - **Quality:** Maximum resolution processing (1024px)
342
 
343
  **📊 CLIP INTERROGATOR ANALYSIS:**
344
  - **Base Prompt:** {base_prompt_preview}
@@ -346,9 +417,9 @@ class UltraSupremeOptimizer:
346
  - **Classic Analysis:** {analysis.get('clip_classic', '')[:80]}...
347
 
348
  **⚡ OPTIMIZATION APPLIED:**
349
- - ✅ Native precision inference for stability
350
- - ✅ GPU acceleration when available
351
- - ✅ Automatic fallback to CPU if needed
352
  - ✅ Added professional camera specifications
353
  - ✅ Enhanced lighting descriptions
354
  - ✅ Applied Flux-specific optimizations
 
30
  self.usage_count = 0
31
  self.device = self._get_device()
32
  self.is_initialized = False
33
+ # NO inicializar modelo aquí - hacerlo lazy
34
+
 
35
  @staticmethod
36
  def _get_device() -> str:
37
  """Determine the best available device for computation"""
 
48
  return True
49
 
50
  try:
51
+ # Configuración para CPU inicialmente
52
  config = Config(
53
  clip_model_name="ViT-L-14/openai",
54
  download_cache=True,
55
  chunk_size=2048,
56
  quiet=True,
57
+ device="cpu" # Siempre inicializar en CPU
58
  )
59
 
60
  self.interrogator = Interrogator(config)
 
62
 
63
  # Clean up memory after initialization
64
  gc.collect()
65
+
66
+ logger.info("Model initialized successfully on CPU")
67
  return True
68
 
69
  except Exception as e:
 
86
  if image.mode != 'RGB':
87
  image = image.convert('RGB')
88
 
89
+ # Resize if too large
90
+ max_size = 768 # Reducir tamaño para evitar problemas de memoria
91
  if image.size[0] > max_size or image.size[1] > max_size:
92
  image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
93
 
 
109
  r',\s*artstation',
110
  r',\s*concept art',
111
  r',\s*digital art',
112
+ r',\s*by greg rutkowski',
113
  ]
114
 
115
  cleaned_prompt = base_prompt
 
148
 
149
  return final_prompt
150
 
151
+ def _prepare_models_for_gpu(self):
152
+ """Prepara los modelos para GPU con la precisión correcta"""
153
+ try:
154
+ if hasattr(self.interrogator, 'caption_model'):
155
+ self.interrogator.caption_model = self.interrogator.caption_model.half().to("cuda")
156
+
157
+ if hasattr(self.interrogator, 'clip_model'):
158
+ self.interrogator.clip_model = self.interrogator.clip_model.half().to("cuda")
159
+
160
+ if hasattr(self.interrogator, 'blip_model'):
161
+ self.interrogator.blip_model = self.interrogator.blip_model.half().to("cuda")
162
+
163
+ self.interrogator.config.device = "cuda"
164
+ logger.info("Models prepared for GPU with FP16")
165
+
166
+ except Exception as e:
167
+ logger.error(f"Error preparing models for GPU: {e}")
168
+ raise
169
+
170
+ def _prepare_models_for_cpu(self):
171
+ """Prepara los modelos para CPU con float32"""
172
+ try:
173
+ if hasattr(self.interrogator, 'caption_model'):
174
+ self.interrogator.caption_model = self.interrogator.caption_model.float().to("cpu")
175
+
176
+ if hasattr(self.interrogator, 'clip_model'):
177
+ self.interrogator.clip_model = self.interrogator.clip_model.float().to("cpu")
178
+
179
+ if hasattr(self.interrogator, 'blip_model'):
180
+ self.interrogator.blip_model = self.interrogator.blip_model.float().to("cpu")
181
+
182
+ self.interrogator.config.device = "cpu"
183
+ logger.info("Models prepared for CPU with FP32")
184
+
185
+ except Exception as e:
186
+ logger.error(f"Error preparing models for CPU: {e}")
187
+ raise
188
+
189
+ @spaces.GPU(duration=60)
190
  def run_clip_inference(self, image: Image.Image) -> Tuple[str, str, str]:
191
  """Solo la inferencia CLIP usa GPU"""
192
  try:
193
+ # Preparar modelos para GPU
194
+ self._prepare_models_for_gpu()
195
+
196
+ # Usar autocast para manejar precisión mixta
197
+ with torch.cuda.amp.autocast(enabled=True, dtype=torch.float16):
198
+ # Convertir imagen a tensor y asegurar que esté en half precision
199
+ from torchvision import transforms
200
+ preprocess = transforms.Compose([
201
+ transforms.Resize((224, 224)),
202
+ transforms.ToTensor(),
203
+ transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
204
+ std=[0.26862954, 0.26130258, 0.27577711]),
205
+ ])
206
 
207
+ # Procesar imagen manualmente para controlar la precisión
208
+ image_tensor = preprocess(image).unsqueeze(0).half().to("cuda")
 
 
209
 
210
+ # Ejecutar inferencias con manejo especial
211
+ full_prompt = self._safe_interrogate(image, 'interrogate')
212
+ clip_fast = self._safe_interrogate(image, 'interrogate_fast')
213
+ clip_classic = self._safe_interrogate(image, 'interrogate_classic')
214
 
215
+ return full_prompt, clip_fast, clip_classic
216
+
217
+ except Exception as e:
218
+ logger.error(f"GPU inference error: {e}")
219
+ # Intentar en CPU como fallback
220
+ return self._run_cpu_inference(image)
221
+
222
+ def _safe_interrogate(self, image: Image.Image, method: str) -> str:
223
+ """Ejecuta interrogate de forma segura manejando precisión"""
224
+ try:
225
+ # Temporalmente parchear el método de procesamiento de imagen
226
+ original_method = getattr(self.interrogator, method)
227
+
228
+ # Ejecutar el método
229
+ result = original_method(image)
230
+
231
+ return result
232
+
233
+ except Exception as e:
234
+ logger.error(f"Error in {method}: {e}")
235
+ return f"Error processing with {method}"
236
+
237
+ def _run_cpu_inference(self, image: Image.Image) -> Tuple[str, str, str]:
238
+ """Ejecuta inferencia en CPU como fallback"""
239
+ try:
240
+ logger.info("Running CPU inference as fallback")
241
+
242
+ # Preparar modelos para CPU
243
+ self._prepare_models_for_cpu()
244
+
245
+ # Ejecutar en CPU sin autocast
246
  full_prompt = self.interrogator.interrogate(image)
247
  clip_fast = self.interrogator.interrogate_fast(image)
248
  clip_classic = self.interrogator.interrogate_classic(image)
 
250
  return full_prompt, clip_fast, clip_classic
251
 
252
  except Exception as e:
253
+ logger.error(f"CPU inference also failed: {e}")
254
+ return "Error: Failed to process image", "Error", "Error"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
  def generate_ultra_supreme_prompt(self, image: Any) -> Tuple[str, str, int, Dict[str, int]]:
257
  """
 
261
  Tuple of (prompt, analysis_info, score, breakdown)
262
  """
263
  try:
264
+ # Inicializar modelo si no está inicializado
265
  if not self.is_initialized:
266
+ if not self.initialize_model():
267
+ return "❌ Model initialization failed.", "Please refresh and try again.", 0, {}
268
 
269
  # Validate input
270
  if image is None:
 
281
 
282
  logger.info("ULTRA SUPREME ANALYSIS - Starting pipeline")
283
 
284
+ # Ejecutar inferencia CLIP
285
  full_prompt, clip_fast, clip_classic = self.run_clip_inference(image)
286
 
287
+ # Verificar si hubo errores
288
+ if "Error" in full_prompt:
289
+ logger.warning("Using fallback prompt due to inference error")
290
+ full_prompt = "A photograph"
291
+ clip_fast = "image"
292
+ clip_classic = "picture"
293
 
294
+ logger.info(f"Prompt completo: {full_prompt[:100]}...")
295
+ logger.info(f"Fast: {clip_fast[:50]}...")
296
+ logger.info(f"Classic: {clip_classic[:50]}...")
297
+
298
+ # Aplicar reglas de Flux al prompt completo
299
  optimized_prompt = self.apply_flux_rules(full_prompt)
300
 
301
+ # Crear análisis para el reporte
302
  analysis_summary = {
303
  "base_prompt": full_prompt,
304
  "clip_fast": clip_fast,
 
308
  "detected_subject": self._detect_subject(full_prompt)
309
  }
310
 
311
+ # Calcular score
312
  score = self._calculate_score(optimized_prompt, full_prompt)
313
  breakdown = {
314
  "base_quality": min(len(full_prompt) // 10, 25),
 
334
  return optimized_prompt, analysis_info, score, breakdown
335
 
336
  except Exception as e:
337
+ logger.error(f"Ultra supreme generation error: {e}", exc_info=True)
338
  return f"❌ Error: {str(e)}", "Please try with a different image.", 0, {}
339
 
340
  def _detect_style(self, prompt: str) -> str:
 
347
  "dramatic": ["dramatic", "cinematic", "moody"]
348
  }
349
 
350
+ prompt_lower = prompt.lower()
351
  for style_name, keywords in styles.items():
352
+ if any(keyword in prompt_lower for keyword in keywords):
353
  return style_name
354
 
355
  return "general"
356
 
357
  def _detect_subject(self, prompt: str) -> str:
358
  """Detecta el sujeto principal del prompt"""
359
+ if not prompt:
360
+ return "Unknown"
361
+
362
  # Tomar las primeras palabras significativas
363
  words = prompt.split(',')[0].split()
364
  if len(words) > 3:
365
  return ' '.join(words[:4])
366
+ return prompt.split(',')[0] if prompt else "Unknown"
367
 
368
  def _calculate_score(self, optimized_prompt: str, base_prompt: str) -> int:
369
  """Calcula el score basado en la calidad del prompt"""
 
391
  duration: float) -> str:
392
  """Generate detailed analysis report"""
393
 
394
+ device_used = "cuda" if torch.cuda.is_available() else "cpu"
395
+ gpu_status = " ZeroGPU" if device_used == "cuda" else "💻 CPU"
396
+ precision_info = "Half Precision (FP16)" if device_used == "cuda" else "Full Precision (FP32)"
397
 
398
  # Extraer información clave
399
  detected_style = analysis.get("detected_style", "general").title()
 
408
  **🧠 INTELLIGENT DETECTION:**
409
  - **Detected Style:** {detected_style}
410
  - **Main Subject:** {detected_subject}
411
+ - **Precision:** Using {precision_info} for optimal performance
412
+ - **Quality:** Maximum resolution processing (768px)
413
 
414
  **📊 CLIP INTERROGATOR ANALYSIS:**
415
  - **Base Prompt:** {base_prompt_preview}
 
417
  - **Classic Analysis:** {analysis.get('clip_classic', '')[:80]}...
418
 
419
  **⚡ OPTIMIZATION APPLIED:**
420
+ - ✅ Mixed precision handling for stability
421
+ - ✅ Automatic GPU/CPU fallback
422
+ - ✅ Memory-efficient processing
423
  - ✅ Added professional camera specifications
424
  - ✅ Enhanced lighting descriptions
425
  - ✅ Applied Flux-specific optimizations