import spaces import gradio as gr import torch from PIL import Image import numpy as np from clip_interrogator import Config, Interrogator import logging import os import warnings from datetime import datetime import gc import re # Suppress warnings warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", category=UserWarning) os.environ["TOKENIZERS_PARALLELISM"] = "false" logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def get_device(): if torch.cuda.is_available(): return "cuda" elif torch.backends.mps.is_available(): return "mps" else: return "cpu" DEVICE = get_device() class FluxRulesEngine: """ Flux prompt optimization based on Pariente AI research Implements structured prompt generation following validated rules """ def __init__(self): self.forbidden_elements = ["++", "weights", "white background [en dev]"] self.articles = ["a", "an", "the"] self.quality_adjectives = [ "majestic", "pristine", "sleek", "elegant", "dramatic", "cinematic", "professional", "stunning", "refined" ] self.lighting_types = [ "golden hour", "studio lighting", "dramatic lighting", "ambient lighting", "natural light", "soft lighting", "rim lighting", "volumetric lighting" ] self.technical_specs = [ "Shot on Phase One", "f/2.8 aperture", "50mm lens", "85mm lens", "35mm lens", "professional photography", "medium format", "high resolution" ] self.materials = [ "metallic", "glass", "chrome", "leather", "fabric", "wood", "concrete", "steel", "ceramic" ] def extract_subject(self, base_prompt): """Extract main subject from CLIP analysis""" words = base_prompt.lower().split() # Common subjects to identify subjects = [ "car", "vehicle", "automobile", "person", "man", "woman", "building", "house", "landscape", "mountain", "tree", "flower", "animal", "dog", "cat", "bird" ] for word in words: if word in subjects: return word # Fallback to first noun-like word return words[0] if words else "subject" def detect_setting(self, base_prompt): """Detect environmental context""" prompt_lower = base_prompt.lower() settings = { "studio": ["studio", "backdrop", "seamless"], "outdoor": ["outdoor", "outside", "landscape", "nature"], "urban": ["city", "street", "urban", "building"], "coastal": ["beach", "ocean", "coast", "sea"], "indoor": ["room", "interior", "inside", "home"] } for setting, keywords in settings.items(): if any(keyword in prompt_lower for keyword in keywords): return setting return "neutral environment" def optimize_for_flux(self, base_prompt, style_preference="professional"): """Apply Flux-specific optimization rules""" # Clean forbidden elements cleaned_prompt = base_prompt for forbidden in self.forbidden_elements: cleaned_prompt = cleaned_prompt.replace(forbidden, "") # Extract key elements subject = self.extract_subject(base_prompt) setting = self.detect_setting(base_prompt) # Build structured prompt components = [] # 1. Article article = "A" if subject[0] not in 'aeiou' else "An" components.append(article) # 2. Descriptive adjectives (max 2-3) adjectives = ["elegant", "professional"] # Fixed instead of random components.extend(adjectives) # 3. Main subject components.append(subject) # 4. Verb/Action (gerund form) if "person" in subject or "man" in subject or "woman" in subject: action = "standing" else: action = "positioned" components.append(action) # 5. Context/Location context_map = { "studio": "in a professional studio setting", "outdoor": "in a natural outdoor environment", "urban": "on an urban street", "coastal": "along a dramatic coastline", "indoor": "in an elegant interior space" } components.append(context_map.get(setting, "in a carefully composed scene")) # 6. Environmental details components.append("with subtle atmospheric effects") # 7. Materials/Textures (if applicable) if any(mat in base_prompt.lower() for mat in ["car", "vehicle", "metal"]): components.append("featuring metallic surfaces") # 8. Lighting effects components.append("illuminated by golden hour lighting") # 9. Technical specs components.append("Shot on Phase One, f/2.8 aperture") # 10. Quality/Style if style_preference == "cinematic": quality = "cinematic composition" elif style_preference == "commercial": quality = "commercial photography quality" else: quality = "professional photography" components.append(quality) # Join components with proper punctuation prompt = ", ".join(components) # Capitalize first letter prompt = prompt[0].upper() + prompt[1:] return prompt def get_optimization_score(self, prompt): """Calculate optimization score for Flux compatibility""" score = 0 # Structure check (order compliance) if prompt.startswith(("A", "An", "The")): score += 15 # Technical specs presence if any(spec in prompt for spec in self.technical_specs): score += 20 # Lighting specification if any(light in prompt.lower() for light in self.lighting_types): score += 15 # No forbidden elements if not any(forbidden in prompt for forbidden in self.forbidden_elements): score += 15 # Proper punctuation and structure if "," in prompt: score += 10 # Length optimization word_count = len(prompt.split()) if 15 <= word_count <= 35: score += 25 elif 10 <= word_count <= 45: score += 15 return min(score, 100) class FluxPromptOptimizer: def __init__(self): self.interrogator = None self.flux_engine = FluxRulesEngine() self.usage_count = 0 self.device = DEVICE self.is_initialized = False def initialize_model(self): if self.is_initialized: return True try: config = Config( clip_model_name="ViT-L-14/openai", download_cache=True, chunk_size=2048, quiet=True, device=self.device ) self.interrogator = Interrogator(config) self.is_initialized = True if self.device == "cpu": gc.collect() else: torch.cuda.empty_cache() return True except Exception as e: logger.error(f"Initialization error: {e}") return False def optimize_image(self, image): if image is None: return None if isinstance(image, np.ndarray): image = Image.fromarray(image) elif not isinstance(image, Image.Image): image = Image.open(image) if image.mode != 'RGB': image = image.convert('RGB') # Optimize image size for processing max_size = 768 if self.device != "cpu" else 512 if image.size[0] > max_size or image.size[1] > max_size: image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) return image @spaces.GPU def generate_optimized_prompt(self, image, style_preference="professional", mode="best"): try: if not self.is_initialized: if not self.initialize_model(): return "❌ Model initialization failed.", "Please refresh and try again.", 0 if image is None: return "❌ Please upload an image.", "No image provided.", 0 self.usage_count += 1 image = self.optimize_image(image) if image is None: return "❌ Image processing failed.", "Invalid image format.", 0 start_time = datetime.now() # Get base analysis from CLIP try: if mode == "fast": base_prompt = self.interrogator.interrogate_fast(image) elif mode == "classic": base_prompt = self.interrogator.interrogate_classic(image) else: base_prompt = self.interrogator.interrogate(image) except Exception as e: base_prompt = self.interrogator.interrogate_fast(image) # Apply Flux-specific optimization optimized_prompt = self.flux_engine.optimize_for_flux(base_prompt, style_preference) # Calculate optimization score score = self.flux_engine.get_optimization_score(optimized_prompt) end_time = datetime.now() duration = (end_time - start_time).total_seconds() # Memory cleanup if self.device == "cpu": gc.collect() else: torch.cuda.empty_cache() # Generate analysis info gpu_status = "⚡ ZeroGPU" if torch.cuda.is_available() else "💻 CPU" analysis_info = f"""**Analysis Complete** **Processing:** {gpu_status} • {duration:.1f}s • {mode.title()} mode **Style:** {style_preference.title()} photography **Optimization Score:** {score}/100 **Generation:** #{self.usage_count} **Base Analysis:** {base_prompt[:100]}... **Enhancement:** Applied Flux-specific structure and terminology""" return optimized_prompt, analysis_info, score except Exception as e: logger.error(f"Generation error: {e}") return f"❌ Error: {str(e)}", "Please try with a different image.", 0 optimizer = FluxPromptOptimizer() def process_image_wrapper(image, style_preference, mode): """Simple wrapper without progress callbacks""" try: prompt, info, score = optimizer.generate_optimized_prompt(image, style_preference, mode) # Create score HTML color = "#22c55e" if score >= 80 else "#f59e0b" if score >= 60 else "#ef4444" score_html = f'''