import spaces import gradio as gr import torch from PIL import Image import numpy as np from clip_interrogator import Config, Interrogator import logging import os import warnings from datetime import datetime import gc import re warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", category=UserWarning) os.environ["TOKENIZERS_PARALLELISM"] = "false" logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def get_device(): if torch.cuda.is_available(): return "cuda" elif torch.backends.mps.is_available(): return "mps" else: return "cpu" DEVICE = get_device() class MaximumFluxAnalyzer: """ Maximum depth analysis engine - extracts EVERYTHING possible from images """ def __init__(self): self.forbidden_elements = ["++", "weights", "white background [en dev]"] # EXPANDED VOCABULARIES FOR MAXIMUM DETECTION self.age_keywords = { "elderly": ["old", "elderly", "aged", "senior", "mature", "weathered", "wrinkled", "gray", "grey", "white hair", "silver", "graying", "ancient", "vintage"], "middle": ["middle-aged", "adult", "grown", "middle", "forties", "fifties"], "young": ["young", "youth", "teenage", "boy", "girl", "child", "kid", "adolescent"] } self.facial_features = { "beard_full": ["beard", "bearded", "facial hair", "full beard", "thick beard", "heavy beard"], "beard_color": ["gray beard", "grey beard", "silver beard", "white beard", "salt pepper", "graying beard"], "mustache": ["mustache", "moustache", "facial hair"], "glasses": ["glasses", "spectacles", "eyeglasses", "wire-frame", "rimmed glasses", "reading glasses"], "eyes": ["eyes", "gaze", "stare", "looking", "piercing", "intense", "deep eyes"], "wrinkles": ["wrinkled", "lines", "aged", "weathered", "creased"], "expression": ["serious", "contemplative", "thoughtful", "stern", "wise", "solemn"] } self.religious_cultural = { "jewish": ["jewish", "orthodox", "hasidic", "rabbi", "religious", "traditional", "ceremonial"], "hat_types": ["hat", "cap", "yarmulke", "kippah", "black hat", "traditional hat", "religious headwear"], "clothing": ["suit", "jacket", "formal", "black clothing", "traditional dress", "religious attire"] } self.hair_descriptors = { "color": ["gray", "grey", "silver", "white", "black", "brown", "blonde", "salt and pepper"], "texture": ["curly", "wavy", "straight", "thick", "thin", "coarse", "fine"], "style": ["long", "short", "receding", "balding", "full head"] } self.setting_environments = { "indoor": ["indoor", "inside", "interior", "room", "office", "home", "building"], "formal": ["formal setting", "office", "meeting room", "conference", "official"], "religious": ["synagogue", "temple", "religious", "ceremonial", "sacred"], "studio": ["studio", "backdrop", "professional", "photography studio"], "casual": ["casual", "relaxed", "informal", "comfortable"] } self.lighting_types = { "natural": ["natural light", "window light", "daylight", "sunlight"], "artificial": ["artificial light", "lamp", "electric", "indoor lighting"], "dramatic": ["dramatic", "contrast", "shadow", "chiaroscuro", "moody"], "soft": ["soft", "gentle", "diffused", "even", "flattering"], "harsh": ["harsh", "direct", "strong", "bright", "intense"] } self.composition_styles = { "portrait": ["portrait", "headshot", "face", "facial", "close-up", "bust"], "seated": ["sitting", "seated", "chair", "sitting down"], "standing": ["standing", "upright", "vertical"], "three_quarter": ["three quarter", "three-quarter", "angled", "turned"] } self.quality_adjectives = { "age_based": { "elderly": ["distinguished", "dignified", "venerable", "wise", "weathered", "experienced"], "middle": ["professional", "mature", "confident", "established"], "young": ["youthful", "fresh", "vibrant", "energetic"] }, "cultural": ["traditional", "Orthodox", "religious", "ceremonial", "devout"], "general": ["elegant", "refined", "sophisticated", "classic", "timeless"] } def extract_maximum_info(self, clip_fast, clip_classic, clip_best): """Combine all three CLIP analyses for maximum information extraction""" # Combine all analyses combined_text = f"{clip_fast} {clip_classic} {clip_best}".lower() analysis = { "age": None, "age_confidence": 0, "gender": None, "facial_features": [], "hair_description": [], "clothing_items": [], "cultural_religious": [], "setting": None, "lighting": None, "composition": None, "mood": None, "technical_suggestions": {} } # DEEP AGE DETECTION age_scores = {"elderly": 0, "middle": 0, "young": 0} for age_type, keywords in self.age_keywords.items(): for keyword in keywords: if keyword in combined_text: age_scores[age_type] += 1 if max(age_scores.values()) > 0: analysis["age"] = max(age_scores, key=age_scores.get) analysis["age_confidence"] = age_scores[analysis["age"]] # GENDER DETECTION if any(word in combined_text for word in ["man", "male", "gentleman", "guy", "he", "his"]): analysis["gender"] = "man" elif any(word in combined_text for word in ["woman", "female", "lady", "she", "her"]): analysis["gender"] = "woman" # COMPREHENSIVE FACIAL FEATURES if any(word in combined_text for word in self.facial_features["beard_full"]): if any(word in combined_text for word in self.facial_features["beard_color"]): analysis["facial_features"].append("silver beard") else: analysis["facial_features"].append("full beard") if any(word in combined_text for word in self.facial_features["glasses"]): analysis["facial_features"].append("wire-frame glasses") if any(word in combined_text for word in self.facial_features["wrinkles"]): analysis["facial_features"].append("weathered features") # HAIR ANALYSIS hair_colors = [color for color in self.hair_descriptors["color"] if color in combined_text] if hair_colors: analysis["hair_description"].extend(hair_colors) # CULTURAL/RELIGIOUS DETECTION if any(word in combined_text for word in self.religious_cultural["jewish"]): analysis["cultural_religious"].append("Orthodox Jewish") if any(word in combined_text for word in self.religious_cultural["hat_types"]): analysis["clothing_items"].append("traditional black hat") if any(word in combined_text for word in self.religious_cultural["clothing"]): analysis["clothing_items"].append("formal religious attire") # ENHANCED SETTING DETECTION setting_scores = {} for setting_type, keywords in self.setting_environments.items(): score = sum(1 for keyword in keywords if keyword in combined_text) if score > 0: setting_scores[setting_type] = score if setting_scores: analysis["setting"] = max(setting_scores, key=setting_scores.get) # LIGHTING ANALYSIS lighting_detected = [] for light_type, keywords in self.lighting_types.items(): if any(keyword in combined_text for keyword in keywords): lighting_detected.append(light_type) if lighting_detected: analysis["lighting"] = lighting_detected[0] # Take first/strongest match # COMPOSITION DETECTION for comp_type, keywords in self.composition_styles.items(): if any(keyword in combined_text for keyword in keywords): analysis["composition"] = comp_type break # TECHNICAL SUGGESTIONS BASED ON ANALYSIS if analysis["composition"] == "portrait": analysis["technical_suggestions"] = { "lens": "85mm lens", "aperture": "f/2.8 aperture", "camera": "Shot on Phase One XF" } elif analysis["composition"] == "seated": analysis["technical_suggestions"] = { "lens": "85mm lens", "aperture": "f/4 aperture", "camera": "Shot on Phase One" } else: analysis["technical_suggestions"] = { "lens": "50mm lens", "aperture": "f/2.8 aperture", "camera": "Shot on Phase One" } return analysis def build_maximum_flux_prompt(self, analysis, original_clips): """Build the most detailed Flux prompt possible""" components = [] # 1. INTELLIGENT ARTICLE SELECTION if analysis["cultural_religious"] and analysis["age"]: # "An elderly Orthodox Jewish man" article = "An" if analysis["age"] == "elderly" else "A" elif analysis["gender"]: article = "A" else: article = "A" components.append(article) # 2. CONTEXT-AWARE ADJECTIVES (max 2-3 per Flux rules) adjectives = [] if analysis["age"] and analysis["age"] in self.quality_adjectives["age_based"]: adjectives.extend(self.quality_adjectives["age_based"][analysis["age"]][:2]) if analysis["cultural_religious"]: adjectives.extend(self.quality_adjectives["cultural"][:1]) if not adjectives: adjectives = self.quality_adjectives["general"][:2] # Limit to 2-3 adjectives as per Flux rules components.extend(adjectives[:2]) # 3. ENHANCED SUBJECT DESCRIPTION subject_parts = [] if analysis["cultural_religious"]: subject_parts.extend(analysis["cultural_religious"]) if analysis["age"] and analysis["age"] != "middle": subject_parts.append(analysis["age"]) if analysis["gender"]: subject_parts.append(analysis["gender"]) else: subject_parts.append("person") main_subject = " ".join(subject_parts) components.append(main_subject) # 4. DETAILED FACIAL FEATURES if analysis["facial_features"]: feature_desc = "with " + " and ".join(analysis["facial_features"]) components.append(feature_desc) # 5. CLOTHING AND ACCESSORIES if analysis["clothing_items"]: clothing_desc = "wearing " + " and ".join(analysis["clothing_items"]) components.append(clothing_desc) # 6. ACTION/POSE (based on composition) action_map = { "seated": "seated in contemplative pose", "standing": "standing with dignified presence", "portrait": "captured in intimate portrait style", "three_quarter": "positioned in three-quarter view" } if analysis["composition"]: action = action_map.get(analysis["composition"], "positioned thoughtfully") else: action = "positioned with natural composure" components.append(action) # 7. ENHANCED ENVIRONMENTAL CONTEXT setting_descriptions = { "indoor": "in a warmly lit indoor environment", "formal": "in a professional formal setting", "religious": "in a traditional religious space", "studio": "in a controlled studio environment", "casual": "in a comfortable informal setting" } if analysis["setting"]: context = setting_descriptions.get(analysis["setting"], "in a thoughtfully composed environment") else: context = "within a carefully arranged scene" components.append(context) # 8. SOPHISTICATED LIGHTING DESCRIPTION lighting_descriptions = { "natural": "bathed in gentle natural lighting that enhances facial texture and depth", "dramatic": "illuminated by dramatic lighting that creates compelling shadows and highlights", "soft": "softly lit to emphasize character and warmth", "artificial": "under controlled artificial lighting for optimal detail capture" } if analysis["lighting"]: lighting_desc = lighting_descriptions.get(analysis["lighting"], "with professional lighting that emphasizes facial features and texture") else: lighting_desc = "captured with sophisticated portrait lighting that brings out intricate facial details" components.append(lighting_desc) # 9. TECHNICAL SPECIFICATIONS tech_parts = [] if analysis["technical_suggestions"]: tech_parts.append(analysis["technical_suggestions"]["camera"]) tech_parts.append(analysis["technical_suggestions"]["lens"]) tech_parts.append(analysis["technical_suggestions"]["aperture"]) else: tech_parts = ["Shot on Phase One", "85mm lens", "f/2.8 aperture"] components.append(", ".join(tech_parts)) # 10. QUALITY MARKER components.append("professional portrait photography") # FINAL ASSEMBLY AND OPTIMIZATION prompt = ", ".join(components) # Clean up the prompt prompt = re.sub(r'\s+', ' ', prompt) # Remove extra spaces prompt = re.sub(r',\s*,', ',', prompt) # Remove double commas prompt = prompt.replace(" ,", ",") # Fix spacing around commas # Ensure proper capitalization prompt = prompt[0].upper() + prompt[1:] if prompt else "" return prompt def calculate_maximum_score(self, prompt, analysis): """Calculate intelligence score based on depth of analysis""" score = 0 max_possible = 100 # Structure compliance (10 points) if prompt.startswith(("A", "An")): score += 10 # Feature detection depth (20 points) feature_score = len(analysis["facial_features"]) * 5 score += min(feature_score, 20) # Cultural/contextual awareness (20 points) if analysis["cultural_religious"]: score += 15 if analysis["age"]: score += 5 # Technical appropriateness (15 points) if "85mm" in prompt and analysis["composition"] in ["portrait", "seated"]: score += 15 elif "50mm" in prompt: score += 10 # Lighting sophistication (15 points) if "lighting" in prompt and len(prompt.split("lighting")[1].split(",")[0]) > 10: score += 15 # Setting context (10 points) if analysis["setting"]: score += 10 # Forbidden elements check (10 points) if not any(forbidden in prompt for forbidden in self.forbidden_elements): score += 10 return min(score, max_possible) class MaximumFluxOptimizer: def __init__(self): self.interrogator = None self.analyzer = MaximumFluxAnalyzer() self.usage_count = 0 self.device = DEVICE self.is_initialized = False def initialize_model(self): if self.is_initialized: return True try: config = Config( clip_model_name="ViT-L-14/openai", download_cache=True, chunk_size=2048, quiet=True, device=self.device ) self.interrogator = Interrogator(config) self.is_initialized = True if self.device == "cpu": gc.collect() else: torch.cuda.empty_cache() return True except Exception as e: logger.error(f"Initialization error: {e}") return False def optimize_image(self, image): if image is None: return None if isinstance(image, np.ndarray): image = Image.fromarray(image) elif not isinstance(image, Image.Image): image = Image.open(image) if image.mode != 'RGB': image = image.convert('RGB') max_size = 768 if self.device != "cpu" else 512 if image.size[0] > max_size or image.size[1] > max_size: image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) return image @spaces.GPU def generate_maximum_prompt(self, image): try: if not self.is_initialized: if not self.initialize_model(): return "❌ Model initialization failed.", "Please refresh and try again.", 0 if image is None: return "❌ Please upload an image.", "No image provided.", 0 self.usage_count += 1 image = self.optimize_image(image) if image is None: return "❌ Image processing failed.", "Invalid image format.", 0 start_time = datetime.now() # TRIPLE CLIP ANALYSIS FOR MAXIMUM INFORMATION logger.info("Starting MAXIMUM analysis - Triple CLIP interrogation") clip_fast = self.interrogator.interrogate_fast(image) clip_classic = self.interrogator.interrogate_classic(image) clip_best = self.interrogator.interrogate(image) logger.info(f"CLIP Results:\nFast: {clip_fast}\nClassic: {clip_classic}\nBest: {clip_best}") # MAXIMUM DEPTH ANALYSIS deep_analysis = self.analyzer.extract_maximum_info(clip_fast, clip_classic, clip_best) # BUILD MAXIMUM QUALITY FLUX PROMPT optimized_prompt = self.analyzer.build_maximum_flux_prompt(deep_analysis, [clip_fast, clip_classic, clip_best]) # CALCULATE INTELLIGENCE SCORE score = self.analyzer.calculate_maximum_score(optimized_prompt, deep_analysis) end_time = datetime.now() duration = (end_time - start_time).total_seconds() # Memory cleanup if self.device == "cpu": gc.collect() else: torch.cuda.empty_cache() # COMPREHENSIVE ANALYSIS REPORT gpu_status = "⚡ ZeroGPU" if torch.cuda.is_available() else "💻 CPU" # Format detected elements features = ", ".join(deep_analysis["facial_features"]) if deep_analysis["facial_features"] else "None detected" cultural = ", ".join(deep_analysis["cultural_religious"]) if deep_analysis["cultural_religious"] else "None detected" clothing = ", ".join(deep_analysis["clothing_items"]) if deep_analysis["clothing_items"] else "None detected" analysis_info = f"""**MAXIMUM ANALYSIS COMPLETE** **Processing:** {gpu_status} • {duration:.1f}s • Triple CLIP interrogation **Intelligence Score:** {score}/100 **Analysis Confidence:** {deep_analysis.get("age_confidence", 0)} age indicators detected **Generation:** #{self.usage_count} **DEEP DETECTION RESULTS:** • **Age Category:** {deep_analysis.get("age", "Unspecified").title()} • **Cultural Context:** {cultural} • **Facial Features:** {features} • **Clothing/Accessories:** {clothing} • **Setting:** {deep_analysis.get("setting", "Standard").title()} • **Composition:** {deep_analysis.get("composition", "Standard").title()} • **Lighting:** {deep_analysis.get("lighting", "Standard").title()} **CLIP ANALYSIS SOURCES:** • **Fast:** {clip_fast[:60]}... • **Classic:** {clip_classic[:60]}... • **Best:** {clip_best[:60]}... **FLUX OPTIMIZATION:** Applied maximum depth analysis with Pariente AI research rules""" return optimized_prompt, analysis_info, score except Exception as e: logger.error(f"Maximum generation error: {e}") return f"❌ Error: {str(e)}", "Please try with a different image.", 0 optimizer = MaximumFluxOptimizer() def process_maximum_analysis(image): """Maximum analysis wrapper""" try: prompt, info, score = optimizer.generate_maximum_prompt(image) # Enhanced score display if score >= 90: color = "#10b981" grade = "EXCELLENT" elif score >= 80: color = "#22c55e" grade = "VERY GOOD" elif score >= 70: color = "#f59e0b" grade = "GOOD" elif score >= 60: color = "#f97316" grade = "FAIR" else: color = "#ef4444" grade = "NEEDS WORK" score_html = f'''