Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
import gradio as gr | |
import torch | |
from PIL import Image | |
import numpy as np | |
from clip_interrogator import Config, Interrogator | |
import logging | |
import os | |
import warnings | |
from datetime import datetime | |
import gc | |
import re | |
warnings.filterwarnings("ignore", category=FutureWarning) | |
warnings.filterwarnings("ignore", category=UserWarning) | |
os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
def get_device(): | |
if torch.cuda.is_available(): | |
return "cuda" | |
elif torch.backends.mps.is_available(): | |
return "mps" | |
else: | |
return "cpu" | |
DEVICE = get_device() | |
class DeepFluxAnalyzer: | |
""" | |
Deep analysis engine that understands image content and applies Flux rules intelligently | |
""" | |
def __init__(self): | |
self.forbidden_elements = ["++", "weights", "white background [en dev]"] | |
# Deep vocabulary for intelligent analysis | |
self.age_descriptors = { | |
"young": ["young", "youthful", "fresh-faced"], | |
"middle": ["middle-aged", "mature"], | |
"elderly": ["elderly", "aged", "distinguished", "weathered"] | |
} | |
self.facial_features = { | |
"beard": ["bearded", "with a full beard", "with facial hair", "with a silver beard", "with a gray beard"], | |
"glasses": ["wearing glasses", "with wire-frame glasses", "with spectacles", "with eyeglasses"], | |
"eyes": ["intense gaze", "piercing eyes", "contemplative expression", "focused stare"] | |
} | |
self.clothing_religious = { | |
"hat": ["black hat", "traditional hat", "religious headwear", "Orthodox hat"], | |
"clothing": ["traditional clothing", "religious attire", "formal wear", "dark clothing"] | |
} | |
self.settings_detailed = { | |
"indoor": ["indoor setting", "interior space", "indoor environment"], | |
"outdoor": ["outdoor setting", "natural environment", "exterior location"], | |
"studio": ["studio setting", "controlled environment", "professional backdrop"] | |
} | |
self.lighting_advanced = { | |
"portrait": ["dramatic portrait lighting", "studio portrait lighting", "professional portrait setup"], | |
"natural": ["natural lighting", "window light", "ambient illumination"], | |
"dramatic": ["dramatic lighting", "high contrast lighting", "chiaroscuro lighting"] | |
} | |
self.technical_professional = { | |
"portrait_lens": ["85mm lens", "135mm lens", "medium telephoto"], | |
"standard_lens": ["50mm lens", "35mm lens", "standard focal length"], | |
"aperture": ["f/1.4 aperture", "f/2.8 aperture", "f/4 aperture"], | |
"camera": ["Shot on Phase One XF", "Shot on Hasselblad", "Shot on Canon EOS R5"] | |
} | |
def analyze_clip_deeply(self, clip_result): | |
"""Extract detailed information from CLIP analysis""" | |
clip_lower = clip_result.lower() | |
analysis = { | |
"subjects": [], | |
"age": None, | |
"features": [], | |
"clothing": [], | |
"setting": None, | |
"mood": None, | |
"composition": None | |
} | |
# Subject and age detection | |
if any(word in clip_lower for word in ["man", "person", "male"]): | |
if any(word in clip_lower for word in ["old", "elderly", "aged", "gray", "grey", "silver"]): | |
analysis["subjects"].append("elderly man") | |
analysis["age"] = "elderly" | |
elif any(word in clip_lower for word in ["young", "youth", "boy"]): | |
analysis["subjects"].append("young man") | |
analysis["age"] = "young" | |
else: | |
analysis["subjects"].append("man") | |
analysis["age"] = "middle" | |
if any(word in clip_lower for word in ["woman", "female", "lady"]): | |
if any(word in clip_lower for word in ["old", "elderly", "aged"]): | |
analysis["subjects"].append("elderly woman") | |
analysis["age"] = "elderly" | |
else: | |
analysis["subjects"].append("woman") | |
# Facial features detection | |
if any(word in clip_lower for word in ["beard", "facial hair", "mustache"]): | |
if any(word in clip_lower for word in ["gray", "grey", "silver", "white"]): | |
analysis["features"].append("silver beard") | |
else: | |
analysis["features"].append("beard") | |
if any(word in clip_lower for word in ["glasses", "spectacles", "eyeglasses"]): | |
analysis["features"].append("glasses") | |
# Clothing and accessories | |
if any(word in clip_lower for word in ["hat", "cap", "headwear"]): | |
analysis["clothing"].append("hat") | |
if any(word in clip_lower for word in ["suit", "formal", "dress", "shirt"]): | |
analysis["clothing"].append("formal wear") | |
# Setting detection | |
if any(word in clip_lower for word in ["indoor", "inside", "interior", "room"]): | |
analysis["setting"] = "indoor" | |
elif any(word in clip_lower for word in ["outdoor", "outside", "landscape", "street"]): | |
analysis["setting"] = "outdoor" | |
elif any(word in clip_lower for word in ["studio", "backdrop"]): | |
analysis["setting"] = "studio" | |
# Mood and composition | |
if any(word in clip_lower for word in ["portrait", "headshot", "face", "close-up"]): | |
analysis["composition"] = "portrait" | |
elif any(word in clip_lower for word in ["sitting", "seated", "chair"]): | |
analysis["composition"] = "seated" | |
elif any(word in clip_lower for word in ["standing", "upright"]): | |
analysis["composition"] = "standing" | |
return analysis | |
def build_flux_prompt(self, analysis, clip_base): | |
"""Build optimized Flux prompt using deep analysis""" | |
components = [] | |
# 1. Article (intelligent selection) | |
if analysis["subjects"]: | |
subject = analysis["subjects"][0] | |
article = "An" if subject[0] in 'aeiou' else "A" | |
else: | |
article = "A" | |
components.append(article) | |
# 2. Descriptive adjectives (context-aware) | |
adjectives = [] | |
if analysis["age"] == "elderly": | |
adjectives.extend(["distinguished", "weathered"]) | |
elif analysis["age"] == "young": | |
adjectives.extend(["young", "fresh-faced"]) | |
else: | |
adjectives.extend(["professional", "elegant"]) | |
# Add up to 2-3 adjectives as per Flux rules | |
components.extend(adjectives[:2]) | |
# 3. Main subject (enhanced with details) | |
if analysis["subjects"]: | |
main_subject = analysis["subjects"][0] | |
# Add religious/cultural context if detected | |
if "hat" in analysis["clothing"] and "beard" in [f.split()[0] for f in analysis["features"]]: | |
main_subject = "Orthodox Jewish " + main_subject | |
else: | |
main_subject = "subject" | |
components.append(main_subject) | |
# 4. Features integration (intelligent placement) | |
feature_descriptions = [] | |
if "glasses" in analysis["features"]: | |
feature_descriptions.append("with distinctive wire-frame glasses") | |
if any("beard" in f for f in analysis["features"]): | |
if "silver beard" in analysis["features"]: | |
feature_descriptions.append("with a distinguished silver beard") | |
else: | |
feature_descriptions.append("with a full beard") | |
if feature_descriptions: | |
components.extend(feature_descriptions) | |
# 5. Clothing and accessories | |
clothing_desc = [] | |
if "hat" in analysis["clothing"]: | |
clothing_desc.append("wearing a traditional black hat") | |
if "formal wear" in analysis["clothing"]: | |
clothing_desc.append("in formal attire") | |
if clothing_desc: | |
components.extend(clothing_desc) | |
# 6. Verb/Action (based on composition analysis) | |
if analysis["composition"] == "seated": | |
action = "seated contemplatively" | |
elif analysis["composition"] == "standing": | |
action = "standing with dignity" | |
else: | |
action = "positioned thoughtfully" | |
components.append(action) | |
# 7. Context/Location (enhanced setting) | |
setting_map = { | |
"indoor": "in an intimate indoor setting", | |
"outdoor": "in a natural outdoor environment", | |
"studio": "in a professional studio environment" | |
} | |
if analysis["setting"]: | |
context = setting_map.get(analysis["setting"], "in a carefully composed environment") | |
else: | |
context = "in a thoughtfully arranged scene" | |
components.append(context) | |
# 8. Environmental details (lighting-aware) | |
if analysis["composition"] == "portrait": | |
env_detail = "with dramatic portrait lighting that emphasizes facial features and texture" | |
else: | |
env_detail = "captured with sophisticated atmospheric lighting" | |
components.append(env_detail) | |
# 9. Technical specifications (composition-appropriate) | |
if analysis["composition"] == "portrait": | |
tech_spec = "Shot on Phase One XF, 85mm lens, f/2.8 aperture" | |
else: | |
tech_spec = "Shot on Phase One, 50mm lens, f/4 aperture" | |
components.append(tech_spec) | |
# 10. Quality marker (always professional) | |
components.append("professional photography") | |
# Join with proper punctuation | |
prompt = ", ".join(components) | |
# Clean up and optimize | |
prompt = re.sub(r'\s+', ' ', prompt) # Remove extra spaces | |
prompt = prompt.replace(", ,", ",") # Remove double commas | |
return prompt | |
def calculate_intelligence_score(self, prompt, analysis): | |
"""Calculate how well the prompt reflects intelligent analysis""" | |
score = 0 | |
# Structure compliance (Flux rules 1-10) | |
if prompt.startswith(("A", "An")): | |
score += 10 | |
# Feature recognition accuracy | |
if len(analysis["features"]) > 0: | |
score += 15 | |
# Context understanding | |
if analysis["setting"]: | |
score += 15 | |
# Subject detail depth | |
if len(analysis["subjects"]) > 0: | |
score += 15 | |
# Technical specs presence | |
if "Phase One" in prompt and "lens" in prompt: | |
score += 15 | |
# Lighting specification | |
if "lighting" in prompt: | |
score += 10 | |
# Composition awareness | |
if analysis["composition"]: | |
score += 10 | |
# Forbidden elements check | |
if not any(forbidden in prompt for forbidden in self.forbidden_elements): | |
score += 10 | |
return min(score, 100) | |
class FluxPromptOptimizer: | |
def __init__(self): | |
self.interrogator = None | |
self.analyzer = DeepFluxAnalyzer() | |
self.usage_count = 0 | |
self.device = DEVICE | |
self.is_initialized = False | |
def initialize_model(self): | |
if self.is_initialized: | |
return True | |
try: | |
config = Config( | |
clip_model_name="ViT-L-14/openai", | |
download_cache=True, | |
chunk_size=2048, | |
quiet=True, | |
device=self.device | |
) | |
self.interrogator = Interrogator(config) | |
self.is_initialized = True | |
if self.device == "cpu": | |
gc.collect() | |
else: | |
torch.cuda.empty_cache() | |
return True | |
except Exception as e: | |
logger.error(f"Initialization error: {e}") | |
return False | |
def optimize_image(self, image): | |
if image is None: | |
return None | |
if isinstance(image, np.ndarray): | |
image = Image.fromarray(image) | |
elif not isinstance(image, Image.Image): | |
image = Image.open(image) | |
if image.mode != 'RGB': | |
image = image.convert('RGB') | |
max_size = 768 if self.device != "cpu" else 512 | |
if image.size[0] > max_size or image.size[1] > max_size: | |
image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) | |
return image | |
def generate_optimized_prompt(self, image): | |
try: | |
if not self.is_initialized: | |
if not self.initialize_model(): | |
return "❌ Model initialization failed.", "Please refresh and try again.", 0 | |
if image is None: | |
return "❌ Please upload an image.", "No image provided.", 0 | |
self.usage_count += 1 | |
image = self.optimize_image(image) | |
if image is None: | |
return "❌ Image processing failed.", "Invalid image format.", 0 | |
start_time = datetime.now() | |
# Get comprehensive CLIP analysis | |
clip_result = self.interrogator.interrogate(image) | |
# Deep analysis of the CLIP result | |
deep_analysis = self.analyzer.analyze_clip_deeply(clip_result) | |
# Build optimized Flux prompt | |
optimized_prompt = self.analyzer.build_flux_prompt(deep_analysis, clip_result) | |
# Calculate intelligence score | |
score = self.analyzer.calculate_intelligence_score(optimized_prompt, deep_analysis) | |
end_time = datetime.now() | |
duration = (end_time - start_time).total_seconds() | |
# Memory cleanup | |
if self.device == "cpu": | |
gc.collect() | |
else: | |
torch.cuda.empty_cache() | |
# Generate detailed analysis info | |
gpu_status = "⚡ ZeroGPU" if torch.cuda.is_available() else "💻 CPU" | |
features_detected = ", ".join(deep_analysis["features"]) if deep_analysis["features"] else "None" | |
subjects_detected = ", ".join(deep_analysis["subjects"]) if deep_analysis["subjects"] else "Generic" | |
analysis_info = f"""**Deep Analysis Complete** | |
**Processing:** {gpu_status} • {duration:.1f}s | |
**Intelligence Score:** {score}/100 | |
**Generation:** #{self.usage_count} | |
**Detected Elements:** | |
• **Subjects:** {subjects_detected} | |
• **Features:** {features_detected} | |
• **Setting:** {deep_analysis["setting"] or "Unspecified"} | |
• **Composition:** {deep_analysis["composition"] or "Standard"} | |
**CLIP Base:** {clip_result[:80]}... | |
**Flux Enhancement:** Applied deep analysis with Pariente AI rules""" | |
return optimized_prompt, analysis_info, score | |
except Exception as e: | |
logger.error(f"Generation error: {e}") | |
return f"❌ Error: {str(e)}", "Please try with a different image.", 0 | |
optimizer = FluxPromptOptimizer() | |
def process_image_wrapper(image): | |
"""Simplified wrapper - no unnecessary options""" | |
try: | |
prompt, info, score = optimizer.generate_optimized_prompt(image) | |
# Create score HTML | |
color = "#22c55e" if score >= 80 else "#f59e0b" if score >= 60 else "#ef4444" | |
score_html = f''' | |
<div style="text-align: center; padding: 1rem; background: linear-gradient(135deg, #f0fdf4 0%, #dcfce7 100%); border: 2px solid {color}; border-radius: 12px; margin: 1rem 0;"> | |
<div style="font-size: 2rem; font-weight: 700; color: {color}; margin: 0;">{score}</div> | |
<div style="font-size: 0.875rem; color: #15803d; margin: 0; text-transform: uppercase; letter-spacing: 0.05em;">Intelligence Score</div> | |
</div> | |
''' | |
return prompt, info, score_html | |
except Exception as e: | |
logger.error(f"Wrapper error: {e}") | |
return "❌ Processing failed", f"Error: {str(e)}", '<div style="text-align: center; color: red;">Error</div>' | |
def clear_outputs(): | |
gc.collect() | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
return "", "", '<div style="text-align: center; padding: 1rem;"><div style="font-size: 2rem; color: #ccc;">--</div><div style="font-size: 0.875rem; color: #999;">Intelligence Score</div></div>' | |
def create_interface(): | |
css = """ | |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); | |
.gradio-container { | |
max-width: 1200px !important; | |
margin: 0 auto !important; | |
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important; | |
background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%) !important; | |
} | |
.main-header { | |
text-align: center; | |
padding: 2rem 0 3rem 0; | |
background: linear-gradient(135deg, #1e293b 0%, #334155 100%); | |
color: white; | |
margin: -2rem -2rem 2rem -2rem; | |
border-radius: 0 0 24px 24px; | |
} | |
.main-title { | |
font-size: 2.5rem !important; | |
font-weight: 700 !important; | |
margin: 0 0 0.5rem 0 !important; | |
letter-spacing: -0.025em !important; | |
background: linear-gradient(135deg, #60a5fa 0%, #3b82f6 100%); | |
-webkit-background-clip: text; | |
-webkit-text-fill-color: transparent; | |
background-clip: text; | |
} | |
.subtitle { | |
font-size: 1.125rem !important; | |
font-weight: 400 !important; | |
opacity: 0.8 !important; | |
margin: 0 !important; | |
} | |
.prompt-output { | |
font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace !important; | |
font-size: 14px !important; | |
line-height: 1.6 !important; | |
background: linear-gradient(135deg, #ffffff 0%, #f8fafc 100%) !important; | |
border: 1px solid #e2e8f0 !important; | |
border-radius: 12px !important; | |
padding: 1.5rem !important; | |
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1) !important; | |
} | |
""" | |
with gr.Blocks( | |
theme=gr.themes.Soft(), | |
title="Flux Prompt Optimizer", | |
css=css | |
) as interface: | |
gr.HTML(""" | |
<div class="main-header"> | |
<div class="main-title">⚡ Flux Prompt Optimizer</div> | |
<div class="subtitle">Deep AI analysis • Intelligent prompt generation • Research-based optimization</div> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("## 📷 Image Analysis") | |
image_input = gr.Image( | |
label="Upload your image", | |
type="pil", | |
height=400 | |
) | |
optimize_btn = gr.Button( | |
"🧠 Analyze & Optimize", | |
variant="primary", | |
size="lg" | |
) | |
gr.Markdown(""" | |
### Deep Analysis Engine | |
This system performs comprehensive image analysis: | |
• **Subject Recognition** - Identifies people, objects, context | |
• **Feature Detection** - Facial features, clothing, accessories | |
• **Composition Analysis** - Lighting, setting, mood | |
• **Flux Optimization** - Applies research-validated rules | |
No options needed - the AI decides what's optimal. | |
""") | |
with gr.Column(scale=1): | |
gr.Markdown("## 🎯 Optimized Result") | |
prompt_output = gr.Textbox( | |
label="Flux-Optimized Prompt", | |
placeholder="Upload an image to see the intelligent analysis and optimization...", | |
lines=8, | |
max_lines=12, | |
elem_classes=["prompt-output"], | |
show_copy_button=True | |
) | |
score_output = gr.HTML( | |
value='<div style="text-align: center; padding: 1rem;"><div style="font-size: 2rem; color: #ccc;">--</div><div style="font-size: 0.875rem; color: #999;">Intelligence Score</div></div>' | |
) | |
info_output = gr.Markdown(value="") | |
clear_btn = gr.Button("🗑️ Clear", size="sm") | |
gr.Markdown(""" | |
--- | |
### 🔬 Pariente AI Research Foundation | |
This optimizer implements deep computer vision analysis combined with validated Flux prompt engineering rules. | |
The system intelligently recognizes image content and applies structured optimization without requiring user configuration. | |
**Research-based • Intelligence-driven • Zero configuration needed** | |
""") | |
# Simple event handlers | |
optimize_btn.click( | |
fn=process_image_wrapper, | |
inputs=[image_input], | |
outputs=[prompt_output, info_output, score_output] | |
) | |
clear_btn.click( | |
fn=clear_outputs, | |
outputs=[prompt_output, info_output, score_output] | |
) | |
return interface | |
if __name__ == "__main__": | |
logger.info("🚀 Starting Deep Flux Prompt Optimizer") | |
interface = create_interface() | |
interface.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
show_error=True | |
) |