import spaces import gradio as gr import torch from PIL import Image import numpy as np from clip_interrogator import Config, Interrogator import logging import os import warnings from datetime import datetime import gc import re warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", category=UserWarning) os.environ["TOKENIZERS_PARALLELISM"] = "false" logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def get_device(): if torch.cuda.is_available(): return "cuda" elif torch.backends.mps.is_available(): return "mps" else: return "cpu" DEVICE = get_device() class DeepFluxAnalyzer: """ Deep analysis engine that understands image content and applies Flux rules intelligently """ def __init__(self): self.forbidden_elements = ["++", "weights", "white background [en dev]"] # Deep vocabulary for intelligent analysis self.age_descriptors = { "young": ["young", "youthful", "fresh-faced"], "middle": ["middle-aged", "mature"], "elderly": ["elderly", "aged", "distinguished", "weathered"] } self.facial_features = { "beard": ["bearded", "with a full beard", "with facial hair", "with a silver beard", "with a gray beard"], "glasses": ["wearing glasses", "with wire-frame glasses", "with spectacles", "with eyeglasses"], "eyes": ["intense gaze", "piercing eyes", "contemplative expression", "focused stare"] } self.clothing_religious = { "hat": ["black hat", "traditional hat", "religious headwear", "Orthodox hat"], "clothing": ["traditional clothing", "religious attire", "formal wear", "dark clothing"] } self.settings_detailed = { "indoor": ["indoor setting", "interior space", "indoor environment"], "outdoor": ["outdoor setting", "natural environment", "exterior location"], "studio": ["studio setting", "controlled environment", "professional backdrop"] } self.lighting_advanced = { "portrait": ["dramatic portrait lighting", "studio portrait lighting", "professional portrait setup"], "natural": ["natural lighting", "window light", "ambient illumination"], "dramatic": ["dramatic lighting", "high contrast lighting", "chiaroscuro lighting"] } self.technical_professional = { "portrait_lens": ["85mm lens", "135mm lens", "medium telephoto"], "standard_lens": ["50mm lens", "35mm lens", "standard focal length"], "aperture": ["f/1.4 aperture", "f/2.8 aperture", "f/4 aperture"], "camera": ["Shot on Phase One XF", "Shot on Hasselblad", "Shot on Canon EOS R5"] } def analyze_clip_deeply(self, clip_result): """Extract detailed information from CLIP analysis""" clip_lower = clip_result.lower() analysis = { "subjects": [], "age": None, "features": [], "clothing": [], "setting": None, "mood": None, "composition": None } # Subject and age detection if any(word in clip_lower for word in ["man", "person", "male"]): if any(word in clip_lower for word in ["old", "elderly", "aged", "gray", "grey", "silver"]): analysis["subjects"].append("elderly man") analysis["age"] = "elderly" elif any(word in clip_lower for word in ["young", "youth", "boy"]): analysis["subjects"].append("young man") analysis["age"] = "young" else: analysis["subjects"].append("man") analysis["age"] = "middle" if any(word in clip_lower for word in ["woman", "female", "lady"]): if any(word in clip_lower for word in ["old", "elderly", "aged"]): analysis["subjects"].append("elderly woman") analysis["age"] = "elderly" else: analysis["subjects"].append("woman") # Facial features detection if any(word in clip_lower for word in ["beard", "facial hair", "mustache"]): if any(word in clip_lower for word in ["gray", "grey", "silver", "white"]): analysis["features"].append("silver beard") else: analysis["features"].append("beard") if any(word in clip_lower for word in ["glasses", "spectacles", "eyeglasses"]): analysis["features"].append("glasses") # Clothing and accessories if any(word in clip_lower for word in ["hat", "cap", "headwear"]): analysis["clothing"].append("hat") if any(word in clip_lower for word in ["suit", "formal", "dress", "shirt"]): analysis["clothing"].append("formal wear") # Setting detection if any(word in clip_lower for word in ["indoor", "inside", "interior", "room"]): analysis["setting"] = "indoor" elif any(word in clip_lower for word in ["outdoor", "outside", "landscape", "street"]): analysis["setting"] = "outdoor" elif any(word in clip_lower for word in ["studio", "backdrop"]): analysis["setting"] = "studio" # Mood and composition if any(word in clip_lower for word in ["portrait", "headshot", "face", "close-up"]): analysis["composition"] = "portrait" elif any(word in clip_lower for word in ["sitting", "seated", "chair"]): analysis["composition"] = "seated" elif any(word in clip_lower for word in ["standing", "upright"]): analysis["composition"] = "standing" return analysis def build_flux_prompt(self, analysis, clip_base): """Build optimized Flux prompt using deep analysis""" components = [] # 1. Article (intelligent selection) if analysis["subjects"]: subject = analysis["subjects"][0] article = "An" if subject[0] in 'aeiou' else "A" else: article = "A" components.append(article) # 2. Descriptive adjectives (context-aware) adjectives = [] if analysis["age"] == "elderly": adjectives.extend(["distinguished", "weathered"]) elif analysis["age"] == "young": adjectives.extend(["young", "fresh-faced"]) else: adjectives.extend(["professional", "elegant"]) # Add up to 2-3 adjectives as per Flux rules components.extend(adjectives[:2]) # 3. Main subject (enhanced with details) if analysis["subjects"]: main_subject = analysis["subjects"][0] # Add religious/cultural context if detected if "hat" in analysis["clothing"] and "beard" in [f.split()[0] for f in analysis["features"]]: main_subject = "Orthodox Jewish " + main_subject else: main_subject = "subject" components.append(main_subject) # 4. Features integration (intelligent placement) feature_descriptions = [] if "glasses" in analysis["features"]: feature_descriptions.append("with distinctive wire-frame glasses") if any("beard" in f for f in analysis["features"]): if "silver beard" in analysis["features"]: feature_descriptions.append("with a distinguished silver beard") else: feature_descriptions.append("with a full beard") if feature_descriptions: components.extend(feature_descriptions) # 5. Clothing and accessories clothing_desc = [] if "hat" in analysis["clothing"]: clothing_desc.append("wearing a traditional black hat") if "formal wear" in analysis["clothing"]: clothing_desc.append("in formal attire") if clothing_desc: components.extend(clothing_desc) # 6. Verb/Action (based on composition analysis) if analysis["composition"] == "seated": action = "seated contemplatively" elif analysis["composition"] == "standing": action = "standing with dignity" else: action = "positioned thoughtfully" components.append(action) # 7. Context/Location (enhanced setting) setting_map = { "indoor": "in an intimate indoor setting", "outdoor": "in a natural outdoor environment", "studio": "in a professional studio environment" } if analysis["setting"]: context = setting_map.get(analysis["setting"], "in a carefully composed environment") else: context = "in a thoughtfully arranged scene" components.append(context) # 8. Environmental details (lighting-aware) if analysis["composition"] == "portrait": env_detail = "with dramatic portrait lighting that emphasizes facial features and texture" else: env_detail = "captured with sophisticated atmospheric lighting" components.append(env_detail) # 9. Technical specifications (composition-appropriate) if analysis["composition"] == "portrait": tech_spec = "Shot on Phase One XF, 85mm lens, f/2.8 aperture" else: tech_spec = "Shot on Phase One, 50mm lens, f/4 aperture" components.append(tech_spec) # 10. Quality marker (always professional) components.append("professional photography") # Join with proper punctuation prompt = ", ".join(components) # Clean up and optimize prompt = re.sub(r'\s+', ' ', prompt) # Remove extra spaces prompt = prompt.replace(", ,", ",") # Remove double commas return prompt def calculate_intelligence_score(self, prompt, analysis): """Calculate how well the prompt reflects intelligent analysis""" score = 0 # Structure compliance (Flux rules 1-10) if prompt.startswith(("A", "An")): score += 10 # Feature recognition accuracy if len(analysis["features"]) > 0: score += 15 # Context understanding if analysis["setting"]: score += 15 # Subject detail depth if len(analysis["subjects"]) > 0: score += 15 # Technical specs presence if "Phase One" in prompt and "lens" in prompt: score += 15 # Lighting specification if "lighting" in prompt: score += 10 # Composition awareness if analysis["composition"]: score += 10 # Forbidden elements check if not any(forbidden in prompt for forbidden in self.forbidden_elements): score += 10 return min(score, 100) class FluxPromptOptimizer: def __init__(self): self.interrogator = None self.analyzer = DeepFluxAnalyzer() self.usage_count = 0 self.device = DEVICE self.is_initialized = False def initialize_model(self): if self.is_initialized: return True try: config = Config( clip_model_name="ViT-L-14/openai", download_cache=True, chunk_size=2048, quiet=True, device=self.device ) self.interrogator = Interrogator(config) self.is_initialized = True if self.device == "cpu": gc.collect() else: torch.cuda.empty_cache() return True except Exception as e: logger.error(f"Initialization error: {e}") return False def optimize_image(self, image): if image is None: return None if isinstance(image, np.ndarray): image = Image.fromarray(image) elif not isinstance(image, Image.Image): image = Image.open(image) if image.mode != 'RGB': image = image.convert('RGB') max_size = 768 if self.device != "cpu" else 512 if image.size[0] > max_size or image.size[1] > max_size: image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) return image @spaces.GPU def generate_optimized_prompt(self, image): try: if not self.is_initialized: if not self.initialize_model(): return "❌ Model initialization failed.", "Please refresh and try again.", 0 if image is None: return "❌ Please upload an image.", "No image provided.", 0 self.usage_count += 1 image = self.optimize_image(image) if image is None: return "❌ Image processing failed.", "Invalid image format.", 0 start_time = datetime.now() # Get comprehensive CLIP analysis clip_result = self.interrogator.interrogate(image) # Deep analysis of the CLIP result deep_analysis = self.analyzer.analyze_clip_deeply(clip_result) # Build optimized Flux prompt optimized_prompt = self.analyzer.build_flux_prompt(deep_analysis, clip_result) # Calculate intelligence score score = self.analyzer.calculate_intelligence_score(optimized_prompt, deep_analysis) end_time = datetime.now() duration = (end_time - start_time).total_seconds() # Memory cleanup if self.device == "cpu": gc.collect() else: torch.cuda.empty_cache() # Generate detailed analysis info gpu_status = "⚡ ZeroGPU" if torch.cuda.is_available() else "💻 CPU" features_detected = ", ".join(deep_analysis["features"]) if deep_analysis["features"] else "None" subjects_detected = ", ".join(deep_analysis["subjects"]) if deep_analysis["subjects"] else "Generic" analysis_info = f"""**Deep Analysis Complete** **Processing:** {gpu_status} • {duration:.1f}s **Intelligence Score:** {score}/100 **Generation:** #{self.usage_count} **Detected Elements:** • **Subjects:** {subjects_detected} • **Features:** {features_detected} • **Setting:** {deep_analysis["setting"] or "Unspecified"} • **Composition:** {deep_analysis["composition"] or "Standard"} **CLIP Base:** {clip_result[:80]}... **Flux Enhancement:** Applied deep analysis with Pariente AI rules""" return optimized_prompt, analysis_info, score except Exception as e: logger.error(f"Generation error: {e}") return f"❌ Error: {str(e)}", "Please try with a different image.", 0 optimizer = FluxPromptOptimizer() def process_image_wrapper(image): """Simplified wrapper - no unnecessary options""" try: prompt, info, score = optimizer.generate_optimized_prompt(image) # Create score HTML color = "#22c55e" if score >= 80 else "#f59e0b" if score >= 60 else "#ef4444" score_html = f'''