Spaces:
Running
on
Zero
Running
on
Zero
""" | |
Configuration file for Phramer AI | |
By Pariente AI, for MIA TV Series | |
Multimodal tool with BAGEL integration and professional photographic prompt optimization | |
""" | |
import os | |
import torch | |
from typing import Dict, Any | |
# Application Configuration | |
APP_CONFIG = { | |
"title": "Phramer AI", | |
"description": "Multimodal tool that reads images and turns them into refined, photo-realistic prompts. Ready for Midjourney, Flux or any generative engine.", | |
"version": "2.0.0", | |
"author": "Pariente AI", | |
"project": "MIA TV Series", | |
"tagline": "By Pariente AI, for MIA TV Series", | |
"logline": "Phramer AI is a multimodal tool that reads an image and turns it into a refined, photo-realistic prompt. Ready for Midjourney, Flux or any generative engine." | |
} | |
# BAGEL Model Configuration | |
BAGEL_CONFIG = { | |
"model_repo": "ByteDance-Seed/BAGEL-7B-MoT", | |
"local_model_path": "./model", | |
"cache_dir": "./model/cache", | |
"download_patterns": ["*.json", "*.safetensors", "*.bin", "*.py", "*.md", "*.txt"], | |
# Model parameters | |
"dtype": torch.bfloat16, | |
"device_map_strategy": "auto", | |
"max_memory_per_gpu": "80GiB", | |
"offload_buffers": True, | |
"force_hooks": True, | |
# Image processing | |
"vae_transform_size": (1024, 512, 16), | |
"vit_transform_size": (980, 224, 14), | |
# Inference parameters | |
"max_new_tokens": 512, | |
"temperature": 0.7, | |
"top_p": 0.9, | |
"do_sample": True | |
} | |
# Device Configuration for ZeroGPU | |
def get_device_config() -> Dict[str, Any]: | |
"""Determine optimal device configuration for BAGEL""" | |
device_config = { | |
"device": "cpu", | |
"use_gpu": False, | |
"gpu_count": 0, | |
"memory_efficient": True | |
} | |
if torch.cuda.is_available(): | |
gpu_count = torch.cuda.device_count() | |
device_config.update({ | |
"device": "cuda", | |
"use_gpu": True, | |
"gpu_count": gpu_count, | |
"gpu_memory_gb": torch.cuda.get_device_properties(0).total_memory / 1e9, | |
"multi_gpu": gpu_count > 1 | |
}) | |
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): | |
device_config.update({ | |
"device": "mps", | |
"use_gpu": True, | |
"gpu_count": 1 | |
}) | |
return device_config | |
# BAGEL Device Mapping Configuration | |
def get_bagel_device_map(gpu_count: int) -> Dict[str, str]: | |
"""Configure device mapping for BAGEL model""" | |
# Same device modules that need to be on the same GPU | |
same_device_modules = [ | |
'language_model.model.embed_tokens', | |
'time_embedder', | |
'latent_pos_embed', | |
'vae2llm', | |
'llm2vae', | |
'connector', | |
'vit_pos_embed' | |
] | |
device_map = {} | |
if gpu_count == 1: | |
# Single GPU configuration | |
for module in same_device_modules: | |
device_map[module] = "cuda:0" | |
else: | |
# Multi-GPU configuration - keep critical modules on same device | |
first_device = "cuda:0" | |
for module in same_device_modules: | |
device_map[module] = first_device | |
return device_map | |
# Processing Configuration | |
PROCESSING_CONFIG = { | |
"max_image_size": 1024, | |
"image_quality": 95, | |
"supported_formats": [".jpg", ".jpeg", ".png", ".webp"], | |
"batch_size": 1, | |
"timeout_seconds": 120 # Increased for BAGEL processing | |
} | |
# Prompt Optimization Rules for Multi-Engine Compatibility | |
FLUX_RULES = { | |
"remove_patterns": [ | |
r',\s*trending on artstation', | |
r',\s*trending on [^,]+', | |
r',\s*\d+k\s*', | |
r',\s*\d+k resolution', | |
r',\s*artstation', | |
r',\s*concept art', | |
r',\s*digital art', | |
r',\s*by greg rutkowski', | |
], | |
"camera_configs": { | |
"portrait": ", Shot on Hasselblad X2D 100C, 85mm f/1.4 lens at f/2.8, ISO 200, rule of thirds composition, professional portrait photography", | |
"landscape": ", Shot on Phase One XT, 24-70mm f/4 lens at f/8, ISO 100, hyperfocal distance, leading lines composition, epic landscape photography", | |
"street": ", Shot on Leica M11, 35mm f/1.4 lens at f/2.8, ISO 800, decisive moment, candid composition, documentary street photography", | |
"cinematic": ", Shot on ARRI Alexa LF, 35mm anamorphic lens at f/2.8, ISO 400, cinematic framing, film grain, dramatic composition", | |
"architectural": ", Shot on Canon EOS R5, 24-70mm f/2.8 lens at f/8, ISO 100, symmetrical composition, perspective correction, architectural photography", | |
"commercial": ", Shot on Hasselblad X2D 100C, 90mm f/2.5 lens at f/4, ISO 100, centered composition, product photography", | |
"default": ", Shot on Phase One XF IQ4, 80mm f/2.8 lens at f/4, ISO 200, balanced composition, professional photography" | |
}, | |
"lighting_enhancements": { | |
"dramatic": ", dramatic key lighting with rim light, chiaroscuro lighting ratio 4:1", | |
"portrait": ", soft key light with fill light, 3-point lighting setup, lighting ratio 2:1", | |
"cinematic": ", moody cinematic lighting with practical lights, dramatic shadows, film noir lighting", | |
"natural": ", natural window lighting, soft diffused light, even lighting ratio", | |
"default": ", professional lighting setup, balanced exposure" | |
}, | |
"style_enhancements": { | |
"photorealistic": ", photorealistic rendering, ultra-detailed, sharp focus, high resolution", | |
"cinematic": ", cinematic composition, film grain, anamorphic bokeh, color grading", | |
"commercial": ", commercial photography style, clean composition, product focus", | |
"editorial": ", editorial photography style, storytelling composition, magazine quality", | |
"fine_art": ", fine art photography, artistic composition, gallery quality" | |
}, | |
# Text condensation patterns | |
"condensation_patterns": { | |
"remove_redundant_phrases": [ | |
r'This image depicts', | |
r'The image shows', | |
r'appears to be', | |
r'seems to be', | |
r'which adds', | |
r'The setting appears to be', | |
r'The scene is', | |
r'The composition is carefully', | |
r'The technical photographic qualities include', | |
r'The artistic elements include' | |
], | |
"simplify_lighting_descriptions": [ | |
(r'The sun is setting, casting a warm, orange glow over the scene', 'golden hour lighting'), | |
(r'soft and warm.*?light', 'soft warm lighting'), | |
(r'dramatic contrast with the dark, shadowy areas', 'dramatic shadows'), | |
(r'long shadows and creating a sense of depth', 'deep shadows') | |
], | |
"condense_mood_descriptions": [ | |
(r'The mood is somber and melancholic.*?contrast to the dark, desolate environment', 'melancholic mood with dramatic contrast'), | |
(r'sense of contemplation or longing', 'contemplative mood'), | |
(r'sense of desolation and abandonment', 'desolate atmosphere') | |
] | |
}, | |
# Mandatory generative keywords | |
"mandatory_keywords": { | |
"quality": ["photorealistic", "ultra-detailed", "sharp focus"], | |
"technical": ["professional photography", "high resolution"], | |
"style_by_scene": { | |
"cinematic": ["cinematic composition", "film grain"], | |
"portrait": ["shallow depth of field", "bokeh"], | |
"landscape": ["wide shot", "epic scale"], | |
"street": ["candid moment", "documentary style"], | |
"architectural": ["geometric composition", "clean lines"] | |
} | |
} | |
} | |
# Enhanced Scoring Configuration with Professional Photography Criteria | |
SCORING_CONFIG = { | |
"max_score": 100, | |
"score_weights": { | |
"prompt_quality": 0.25, | |
"technical_details": 0.25, | |
"professional_photography": 0.25, | |
"multi_engine_optimization": 0.25 | |
}, | |
"grade_thresholds": { | |
95: {"grade": "LEGENDARY", "color": "#059669"}, | |
90: {"grade": "EXCELLENT", "color": "#10b981"}, | |
85: {"grade": "VERY GOOD", "color": "#22c55e"}, | |
75: {"grade": "GOOD", "color": "#84cc16"}, | |
65: {"grade": "FAIR", "color": "#f59e0b"}, | |
50: {"grade": "NEEDS WORK", "color": "#f97316"}, | |
0: {"grade": "POOR", "color": "#ef4444"} | |
}, | |
"professional_criteria": { | |
"camera_equipment": ["Canon", "Sony", "Leica", "Hasselblad", "Phase One", "ARRI"], | |
"lens_specifications": ["f/", "mm", "anamorphic", "telephoto", "wide-angle"], | |
"technical_settings": ["ISO", "aperture", "f/", "shutter speed", "exposure"], | |
"lighting_techniques": ["key light", "rim light", "fill light", "3-point lighting", "lighting ratio"], | |
"composition_rules": ["rule of thirds", "leading lines", "depth of field", "bokeh", "symmetrical", "centered"], | |
"cinematography_terms": ["cinematic", "dramatic", "film grain", "anamorphic bokeh", "color grading"] | |
} | |
} | |
# Environment Configuration | |
ENVIRONMENT = { | |
"is_spaces": os.getenv("SPACE_ID") is not None, | |
"is_local": os.getenv("SPACE_ID") is None, | |
"log_level": os.getenv("LOG_LEVEL", "INFO"), | |
"debug_mode": os.getenv("DEBUG", "false").lower() == "true", | |
"space_id": os.getenv("SPACE_ID", ""), | |
"space_author": os.getenv("SPACE_AUTHOR_NAME", "") | |
} | |
# Enhanced BAGEL Prompts for Professional Analysis | |
BAGEL_PROMPTS = { | |
"multimodal_analysis": """Analyze this image for professional prompt generation. Provide exactly two sections: | |
1. DESCRIPTION: Create a detailed, flowing paragraph describing the image, including: | |
- Image type (photograph, illustration, artwork, scene) | |
- Subject matter and composition elements | |
- Color palette, lighting conditions, and mood | |
- Visual style, artistic elements, and photographic techniques | |
- Any cinematic or dramatic qualities | |
2. CAMERA_SETUP: Recommend professional camera and lens setup based on scene analysis: | |
- For portraits: High-end camera with portrait lens (85mm-135mm f/1.4-f/2.8) | |
- For landscapes: Medium format with wide lens (24-40mm f/4-f/8) | |
- For street/documentary: Compact system with standard lens (35mm-50mm f/1.4-f/2.8) | |
- For cinematic scenes: Cinema camera with appropriate lens for mood | |
- Include specific aperture, focal length, and shooting considerations | |
Focus on creating prompts suitable for multiple generative engines (Flux, Midjourney, etc.).""", | |
"flux_optimization": """Analyze this image for FLUX prompt generation with professional photography principles: | |
1. DESCRIPTION: Detailed scene description focusing on: | |
- Visual composition and subject placement | |
- Lighting quality, direction, and mood | |
- Color relationships and tonal values | |
- Artistic style and photographic approach | |
- Technical qualities that enhance realism | |
2. CAMERA_SETUP: Professional equipment recommendation: | |
- Camera body suitable for the scene type | |
- Lens choice with specific focal length and aperture | |
- Lighting setup considerations | |
- Technical settings for optimal results | |
Generate content optimized for photorealistic output.""", | |
"cinematic_analysis": """Analyze this image for cinematic prompt creation: | |
1. DESCRIPTION: Focus on cinematic elements: | |
- Scene composition and framing | |
- Lighting mood and dramatic elements | |
- Color grading and visual atmosphere | |
- Character positioning and environmental context | |
- Storytelling visual cues | |
2. CAMERA_SETUP: Cinema-grade equipment: | |
- Professional cinema camera recommendation | |
- Lens choice for cinematic effect | |
- Lighting setup for mood and story | |
- Technical considerations for film-quality output | |
Optimize for high-end generative engines with cinematic capabilities.""" | |
} | |
# Professional Photography Integration Settings | |
PROFESSIONAL_PHOTOGRAPHY_CONFIG = { | |
"enable_expert_analysis": True, | |
"knowledge_base_integration": True, | |
"technical_enhancement": True, | |
"composition_guidance": True, | |
"prompt_condensation": True, | |
"scene_detection_keywords": { | |
"portrait": ["person", "face", "portrait", "headshot", "model"], | |
"landscape": ["landscape", "nature", "mountain", "sky", "horizon"], | |
"street": ["street", "urban", "city", "documentary", "candid"], | |
"architectural": ["building", "architecture", "structure", "interior"], | |
"cinematic": ["film", "movie", "cinematic", "dramatic", "story"], | |
"commercial": ["product", "commercial", "advertising", "brand"] | |
}, | |
"enhancement_priorities": [ | |
"technical_accuracy", | |
"professional_terminology", | |
"equipment_specifications", | |
"lighting_description", | |
"composition_analysis", | |
"prompt_optimization" | |
], | |
# Prompt optimization settings | |
"prompt_optimization": { | |
"max_length": 150, | |
"remove_redundancy": True, | |
"add_technical_specs": True, | |
"include_composition_terms": True, | |
"mandatory_iso_settings": True, | |
"enforce_aperture_format": True | |
} | |
} | |
# Flash Attention Installation Command | |
FLASH_ATTN_INSTALL = { | |
"command": "pip install flash-attn --no-build-isolation", | |
"env": {"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, | |
"shell": True | |
} | |
# Export main configurations | |
__all__ = [ | |
"APP_CONFIG", | |
"BAGEL_CONFIG", | |
"get_device_config", | |
"get_bagel_device_map", | |
"PROCESSING_CONFIG", | |
"FLUX_RULES", | |
"SCORING_CONFIG", | |
"ENVIRONMENT", | |
"BAGEL_PROMPTS", | |
"PROFESSIONAL_PHOTOGRAPHY_CONFIG", | |
"FLASH_ATTN_INSTALL" | |
] |