Phramer_AI / analyzer.py
Malaji71's picture
Update analyzer.py
ce445c7 verified
raw
history blame
24.4 kB
"""
Ultra Supreme Analyzer - Complete Multi-Model Analysis
Integrates multiple specialized models for comprehensive image analysis
"""
import re
import logging
import spaces
import torch
import cv2
import numpy as np
from typing import Dict, List, Any, Tuple, Optional
from PIL import Image
# Deep learning models for specialized analysis
try:
from deepface import DeepFace
DEEPFACE_AVAILABLE = True
except:
DEEPFACE_AVAILABLE = False
try:
import mediapipe as mp
MEDIAPIPE_AVAILABLE = True
except:
MEDIAPIPE_AVAILABLE = False
try:
from transformers import pipeline
TRANSFORMERS_AVAILABLE = True
except:
TRANSFORMERS_AVAILABLE = False
from constants import (
FORBIDDEN_ELEMENTS, MICRO_AGE_INDICATORS, ULTRA_FACIAL_ANALYSIS,
EMOTION_MICRO_EXPRESSIONS, CULTURAL_RELIGIOUS_ULTRA, CLOTHING_ACCESSORIES_ULTRA,
ENVIRONMENTAL_ULTRA_ANALYSIS, POSE_BODY_LANGUAGE_ULTRA, COMPOSITION_PHOTOGRAPHY_ULTRA,
TECHNICAL_PHOTOGRAPHY_ULTRA, QUALITY_DESCRIPTORS_ULTRA, GENDER_INDICATORS
)
logger = logging.getLogger(__name__)
class UltraSupremeAnalyzer:
"""Complete analyzer with multiple specialized models"""
def __init__(self):
self.face_cascade = None
self.pose_detector = None
self.emotion_classifier = None
self.scene_classifier = None
self.models_initialized = False
def _initialize_models(self):
"""Lazy initialization of models"""
if self.models_initialized:
return
try:
# OpenCV face detector (lightweight)
self.face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
# MediaPipe pose detector
if MEDIAPIPE_AVAILABLE:
self.mp_pose = mp.solutions.pose
self.pose_detector = self.mp_pose.Pose(
static_image_mode=True,
min_detection_confidence=0.5
)
# Emotion classifier from transformers
if TRANSFORMERS_AVAILABLE:
self.emotion_classifier = pipeline(
"image-classification",
model="dima806/facial_emotions_image_detection"
)
self.models_initialized = True
logger.info("Additional analysis models initialized")
except Exception as e:
logger.error(f"Error initializing models: {e}")
self.models_initialized = False
@spaces.GPU(duration=30)
def ultra_supreme_analysis(self, image: Any, clip_fast: str, clip_classic: str, clip_best: str) -> Dict[str, Any]:
"""Complete analysis using all available models"""
# Initialize models if needed
self._initialize_models()
# Start with CLIP analysis
clip_analysis = self._parse_clip_results(clip_fast, clip_classic, clip_best)
# Convert image for processing
if isinstance(image, Image.Image):
img_array = np.array(image)
img_rgb = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
else:
img_rgb = image
image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# Initialize complete analysis structure
analysis = {
"clip_fast": clip_fast,
"clip_classic": clip_classic,
"clip_best": clip_best,
"full_description": f"{clip_fast} {clip_classic} {clip_best}",
"demographic": {
"age_category": None,
"age_confidence": 0,
"gender": None,
"gender_confidence": 0,
"cultural_religious": []
},
"facial_ultra": {
"eyes": [],
"eyebrows": [],
"nose": [],
"mouth": [],
"facial_hair": [],
"skin": [],
"structure": [],
"face_count": 0,
"face_locations": []
},
"emotional_state": {
"primary_emotion": None,
"emotion_confidence": 0,
"emotion_distribution": {},
"micro_expressions": [],
"overall_demeanor": []
},
"clothing_accessories": {
"headwear": [],
"eyewear": [],
"clothing": [],
"accessories": [],
"style": []
},
"environmental": {
"setting_type": None,
"specific_location": None,
"lighting_analysis": [],
"atmosphere": [],
"objects": []
},
"pose_composition": {
"body_language": [],
"head_position": [],
"eye_contact": [],
"posture": [],
"gesture": [],
"pose_confidence": 0
},
"technical_analysis": {
"shot_type": None,
"angle": None,
"lighting_setup": None,
"composition": [],
"suggested_equipment": {}
},
"intelligence_metrics": {
"total_features_detected": 0,
"analysis_depth_score": 0,
"cultural_awareness_score": 0,
"technical_optimization_score": 0,
"model_confidence_average": 0
}
}
# Merge CLIP analysis
analysis = self._merge_analysis(analysis, clip_analysis)
# Face detection and analysis
face_analysis = self._analyze_faces(img_rgb, image)
analysis = self._merge_analysis(analysis, face_analysis)
# Pose analysis
if MEDIAPIPE_AVAILABLE:
pose_analysis = self._analyze_pose(image)
analysis = self._merge_analysis(analysis, pose_analysis)
# Emotion analysis
if TRANSFORMERS_AVAILABLE and analysis["facial_ultra"]["face_count"] > 0:
emotion_analysis = self._analyze_emotions(image)
analysis = self._merge_analysis(analysis, emotion_analysis)
# Scene and environment analysis
scene_analysis = self._analyze_scene(clip_analysis)
analysis = self._merge_analysis(analysis, scene_analysis)
# Calculate intelligence metrics
analysis = self._calculate_intelligence_metrics(analysis)
return analysis
def _parse_clip_results(self, clip_fast: str, clip_classic: str, clip_best: str) -> Dict[str, Any]:
"""Parse CLIP results for structured information"""
combined_text = f"{clip_fast} {clip_classic} {clip_best}".lower()
analysis = {
"demographic": {},
"facial_ultra": {},
"emotional_state": {},
"clothing_accessories": {},
"environmental": {},
"pose_composition": {},
"technical_analysis": {}
}
# Gender detection
for gender, indicators in GENDER_INDICATORS.items():
if any(indicator in combined_text for indicator in indicators):
analysis["demographic"]["gender"] = gender
analysis["demographic"]["gender_confidence"] = 0.8
break
# Age detection
for age_category, indicators in MICRO_AGE_INDICATORS.items():
if any(indicator in combined_text for indicator in indicators):
analysis["demographic"]["age_category"] = age_category
analysis["demographic"]["age_confidence"] = 0.7
break
# Facial features
for feature_type, features in ULTRA_FACIAL_ANALYSIS.items():
if isinstance(features, dict):
for sub_type, sub_features in features.items():
found = [f for f in sub_features if f in combined_text]
if found and feature_type in analysis["facial_ultra"]:
analysis["facial_ultra"][feature_type] = found
else:
found = [f for f in features if f in combined_text]
if found:
analysis["facial_ultra"][feature_type] = found
# Emotions
all_emotions = EMOTION_MICRO_EXPRESSIONS["primary_emotions"] + EMOTION_MICRO_EXPRESSIONS["complex_emotions"]
found_emotions = [e for e in all_emotions if e in combined_text]
if found_emotions:
analysis["emotional_state"]["primary_emotion"] = found_emotions[0]
analysis["emotional_state"]["micro_expressions"] = found_emotions
# Environment
for setting_type, settings in ENVIRONMENTAL_ULTRA_ANALYSIS["indoor_settings"].items():
if any(s in combined_text for s in settings):
analysis["environmental"]["setting_type"] = f"indoor_{setting_type}"
break
for setting_type, settings in ENVIRONMENTAL_ULTRA_ANALYSIS["outdoor_settings"].items():
if any(s in combined_text for s in settings):
analysis["environmental"]["setting_type"] = f"outdoor_{setting_type}"
break
# Technical analysis
for shot_type in COMPOSITION_PHOTOGRAPHY_ULTRA["shot_types"]:
if shot_type in combined_text:
analysis["technical_analysis"]["shot_type"] = shot_type
break
return analysis
def _analyze_faces(self, img_bgr: np.ndarray, img_pil: Image.Image) -> Dict[str, Any]:
"""Analyze faces using OpenCV and DeepFace"""
analysis = {"facial_ultra": {}, "demographic": {}, "emotional_state": {}}
# OpenCV face detection
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
faces = self.face_cascade.detectMultiScale(gray, 1.1, 4)
analysis["facial_ultra"]["face_count"] = len(faces)
analysis["facial_ultra"]["face_locations"] = faces.tolist() if len(faces) > 0 else []
# DeepFace analysis for the first detected face
if DEEPFACE_AVAILABLE and len(faces) > 0:
try:
# Analyze with DeepFace
results = DeepFace.analyze(
img_path=np.array(img_pil),
actions=['age', 'gender', 'emotion', 'race'],
enforce_detection=False,
silent=True
)
if isinstance(results, list):
results = results[0]
# Extract demographics
analysis["demographic"]["age_category"] = self._age_to_category(results.get('age', 0))
analysis["demographic"]["age_confidence"] = 0.85
analysis["demographic"]["gender"] = results.get('dominant_gender', '').lower()
analysis["demographic"]["gender_confidence"] = results.get('gender', {}).get(
results.get('dominant_gender', ''), 0
) / 100.0
# Extract emotions
emotions = results.get('emotion', {})
if emotions:
sorted_emotions = sorted(emotions.items(), key=lambda x: x[1], reverse=True)
analysis["emotional_state"]["primary_emotion"] = sorted_emotions[0][0]
analysis["emotional_state"]["emotion_confidence"] = sorted_emotions[0][1] / 100.0
analysis["emotional_state"]["emotion_distribution"] = {
k: v/100.0 for k, v in emotions.items()
}
except Exception as e:
logger.warning(f"DeepFace analysis failed: {e}")
return analysis
def _analyze_pose(self, image: Image.Image) -> Dict[str, Any]:
"""Analyze body pose using MediaPipe"""
analysis = {"pose_composition": {}}
if not MEDIAPIPE_AVAILABLE or not self.pose_detector:
return analysis
try:
# Convert PIL to RGB array
image_rgb = np.array(image)
# Process the image
results = self.pose_detector.process(image_rgb)
if results.pose_landmarks:
landmarks = results.pose_landmarks.landmark
# Analyze head position
nose = landmarks[self.mp_pose.PoseLandmark.NOSE]
left_eye = landmarks[self.mp_pose.PoseLandmark.LEFT_EYE]
right_eye = landmarks[self.mp_pose.PoseLandmark.RIGHT_EYE]
# Calculate head tilt
eye_diff_y = abs(left_eye.y - right_eye.y)
if eye_diff_y > 0.02:
analysis["pose_composition"]["head_position"] = ["head tilted"]
else:
analysis["pose_composition"]["head_position"] = ["head straight"]
# Analyze posture
left_shoulder = landmarks[self.mp_pose.PoseLandmark.LEFT_SHOULDER]
right_shoulder = landmarks[self.mp_pose.PoseLandmark.RIGHT_SHOULDER]
shoulder_diff_y = abs(left_shoulder.y - right_shoulder.y)
if shoulder_diff_y < 0.02:
analysis["pose_composition"]["posture"] = ["upright posture", "balanced stance"]
else:
analysis["pose_composition"]["posture"] = ["asymmetric posture"]
# Confidence based on visibility
visibility_scores = [l.visibility for l in landmarks]
analysis["pose_composition"]["pose_confidence"] = np.mean(visibility_scores)
# Body language interpretation
if nose.y < 0.3:
analysis["pose_composition"]["body_language"].append("confident stance")
except Exception as e:
logger.warning(f"Pose analysis failed: {e}")
return analysis
def _analyze_emotions(self, image: Image.Image) -> Dict[str, Any]:
"""Analyze emotions using transformer model"""
analysis = {"emotional_state": {}}
if not TRANSFORMERS_AVAILABLE or not self.emotion_classifier:
return analysis
try:
# Run emotion classification
predictions = self.emotion_classifier(image)
if predictions:
# Sort by confidence
predictions.sort(key=lambda x: x['score'], reverse=True)
# Primary emotion
analysis["emotional_state"]["primary_emotion"] = predictions[0]['label'].lower()
analysis["emotional_state"]["emotion_confidence"] = predictions[0]['score']
# Emotion distribution
analysis["emotional_state"]["emotion_distribution"] = {
pred['label'].lower(): pred['score'] for pred in predictions[:5]
}
# Map to micro-expressions
primary = predictions[0]['label'].lower()
if primary in ['happy', 'joy']:
analysis["emotional_state"]["micro_expressions"] = ["smile", "positive expression"]
elif primary in ['sad', 'sorrow']:
analysis["emotional_state"]["micro_expressions"] = ["downturned mouth", "melancholic"]
elif primary in ['angry', 'disgust']:
analysis["emotional_state"]["micro_expressions"] = ["furrowed brow", "tense jaw"]
elif primary in ['surprise', 'fear']:
analysis["emotional_state"]["micro_expressions"] = ["raised eyebrows", "wide eyes"]
except Exception as e:
logger.warning(f"Emotion analysis failed: {e}")
return analysis
def _analyze_scene(self, clip_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Analyze scene and environment from CLIP results"""
analysis = {"environmental": clip_analysis.get("environmental", {})}
# Lighting analysis based on CLIP description
combined_text = clip_analysis.get("full_description", "").lower()
lighting_keywords = {
"natural light": ["sunlight", "daylight", "outdoor", "sunny"],
"artificial light": ["indoor", "lamp", "fluorescent", "led"],
"dramatic lighting": ["dramatic", "moody", "contrast", "shadow"],
"soft lighting": ["soft", "diffused", "gentle", "even"]
}
for light_type, keywords in lighting_keywords.items():
if any(keyword in combined_text for keyword in keywords):
analysis["environmental"]["lighting_analysis"].append(light_type)
# Atmosphere
if any(word in combined_text for word in ["professional", "formal", "business"]):
analysis["environmental"]["atmosphere"].append("professional")
if any(word in combined_text for word in ["casual", "relaxed", "informal"]):
analysis["environmental"]["atmosphere"].append("casual")
if any(word in combined_text for word in ["artistic", "creative", "abstract"]):
analysis["environmental"]["atmosphere"].append("artistic")
return analysis
def _age_to_category(self, age: int) -> str:
"""Convert numeric age to category"""
if age < 2:
return "infant"
elif age < 12:
return "child"
elif age < 20:
return "teen"
elif age < 35:
return "young_adult"
elif age < 50:
return "middle_aged"
elif age < 65:
return "senior"
else:
return "elderly"
def _merge_analysis(self, base: Dict[str, Any], new: Dict[str, Any]) -> Dict[str, Any]:
"""Merge analysis results"""
for key, value in new.items():
if key in base:
if isinstance(value, dict) and isinstance(base[key], dict):
base[key].update(value)
elif isinstance(value, list) and isinstance(base[key], list):
base[key].extend(value)
elif value is not None and (not isinstance(base[key], (int, float)) or base[key] == 0):
base[key] = value
return base
def _calculate_intelligence_metrics(self, analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Calculate intelligence metrics based on analysis completeness"""
metrics = analysis["intelligence_metrics"]
# Count detected features
total_features = 0
confidence_scores = []
# Demographic features
if analysis["demographic"]["age_category"]:
total_features += 1
confidence_scores.append(analysis["demographic"]["age_confidence"])
if analysis["demographic"]["gender"]:
total_features += 1
confidence_scores.append(analysis["demographic"]["gender_confidence"])
# Facial features
for feature in ["eyes", "eyebrows", "nose", "mouth", "facial_hair", "skin", "structure"]:
if analysis["facial_ultra"].get(feature):
total_features += len(analysis["facial_ultra"][feature])
# Emotional features
if analysis["emotional_state"]["primary_emotion"]:
total_features += 1
confidence_scores.append(analysis["emotional_state"]["emotion_confidence"])
# Pose features
if analysis["pose_composition"].get("pose_confidence", 0) > 0:
total_features += 1
confidence_scores.append(analysis["pose_composition"]["pose_confidence"])
# Environmental features
if analysis["environmental"]["setting_type"]:
total_features += 1
total_features += len(analysis["environmental"].get("lighting_analysis", []))
# Technical features
if analysis["technical_analysis"]["shot_type"]:
total_features += 1
# Calculate scores
metrics["total_features_detected"] = total_features
metrics["analysis_depth_score"] = min(100, total_features * 5)
# Cultural awareness (if religious/cultural indicators found)
if analysis["demographic"].get("cultural_religious"):
metrics["cultural_awareness_score"] = 80
else:
metrics["cultural_awareness_score"] = 40
# Technical optimization score
tech_features = sum([
1 if analysis["technical_analysis"]["shot_type"] else 0,
len(analysis["environmental"].get("lighting_analysis", [])),
len(analysis["pose_composition"].get("posture", []))
])
metrics["technical_optimization_score"] = min(100, tech_features * 25)
# Average confidence
if confidence_scores:
metrics["model_confidence_average"] = sum(confidence_scores) / len(confidence_scores)
else:
metrics["model_confidence_average"] = 0.5
return analysis
def build_ultra_supreme_prompt(self, ultra_analysis: Dict[str, Any], clip_results: List[str]) -> str:
"""Build enhanced prompt based on comprehensive analysis"""
prompt_parts = []
# Start with the best CLIP result
if clip_results:
prompt_parts.append(clip_results[0])
# Add demographic details if confident
if ultra_analysis["demographic"]["age_category"] and ultra_analysis["demographic"]["age_confidence"] > 0.7:
age_descriptors = QUALITY_DESCRIPTORS_ULTRA["based_on_age"].get(
ultra_analysis["demographic"]["age_category"], []
)
if age_descriptors:
prompt_parts.append(age_descriptors[0])
# Add emotional context
if ultra_analysis["emotional_state"]["primary_emotion"]:
emotion = ultra_analysis["emotional_state"]["primary_emotion"]
emotion_descriptors = QUALITY_DESCRIPTORS_ULTRA["based_on_emotion"].get(emotion, [])
if emotion_descriptors:
prompt_parts.append(f"{emotion_descriptors[0]} expression")
# Add technical details
if ultra_analysis["technical_analysis"]["shot_type"]:
prompt_parts.append(ultra_analysis["technical_analysis"]["shot_type"])
# Add lighting
lighting = ultra_analysis["environmental"].get("lighting_analysis", [])
if lighting:
prompt_parts.append(f"with {lighting[0]}")
# Combine parts
enhanced_prompt = ", ".join(prompt_parts)
# Clean up
enhanced_prompt = re.sub(r'\s+', ' ', enhanced_prompt)
enhanced_prompt = re.sub(r',\s*,+', ',', enhanced_prompt)
return enhanced_prompt
def calculate_ultra_supreme_score(self, prompt: str, ultra_analysis: Dict[str, Any]) -> Tuple[int, Dict[str, int]]:
"""Calculate comprehensive score based on multi-model analysis"""
breakdown = {}
# Base score from prompt quality
breakdown["prompt_quality"] = min(25, len(prompt) // 10)
# Analysis depth score
breakdown["analysis_depth"] = min(25, ultra_analysis["intelligence_metrics"]["analysis_depth_score"] // 4)
# Model confidence score
avg_confidence = ultra_analysis["intelligence_metrics"]["model_confidence_average"]
breakdown["model_confidence"] = int(avg_confidence * 25)
# Feature richness score
total_features = ultra_analysis["intelligence_metrics"]["total_features_detected"]
breakdown["feature_richness"] = min(25, total_features * 2)
total_score = sum(breakdown.values())
return total_score, breakdown