Phramer_AI

Running on Zero

App Files Files Community

Phramer_AI / analyzer.py

Malaji71

Update analyzer.py

ce445c7 verified 2 months ago

raw

history blame

24.4 kB

	"""
	Ultra Supreme Analyzer - Complete Multi-Model Analysis
	Integrates multiple specialized models for comprehensive image analysis
	"""

	import re
	import logging
	import spaces
	import torch
	import cv2
	import numpy as np
	from typing import Dict, List, Any, Tuple, Optional
	from PIL import Image

	# Deep learning models for specialized analysis
	try:
	from deepface import DeepFace
	DEEPFACE_AVAILABLE = True
	except:
	DEEPFACE_AVAILABLE = False

	try:
	import mediapipe as mp
	MEDIAPIPE_AVAILABLE = True
	except:
	MEDIAPIPE_AVAILABLE = False

	try:
	from transformers import pipeline
	TRANSFORMERS_AVAILABLE = True
	except:
	TRANSFORMERS_AVAILABLE = False

	from constants import (
	FORBIDDEN_ELEMENTS, MICRO_AGE_INDICATORS, ULTRA_FACIAL_ANALYSIS,
	EMOTION_MICRO_EXPRESSIONS, CULTURAL_RELIGIOUS_ULTRA, CLOTHING_ACCESSORIES_ULTRA,
	ENVIRONMENTAL_ULTRA_ANALYSIS, POSE_BODY_LANGUAGE_ULTRA, COMPOSITION_PHOTOGRAPHY_ULTRA,
	TECHNICAL_PHOTOGRAPHY_ULTRA, QUALITY_DESCRIPTORS_ULTRA, GENDER_INDICATORS
	)

	logger = logging.getLogger(__name__)


	class UltraSupremeAnalyzer:
	"""Complete analyzer with multiple specialized models"""

	def __init__(self):
	self.face_cascade = None
	self.pose_detector = None
	self.emotion_classifier = None
	self.scene_classifier = None
	self.models_initialized = False

	def _initialize_models(self):
	"""Lazy initialization of models"""
	if self.models_initialized:
	return

	try:
	# OpenCV face detector (lightweight)
	self.face_cascade = cv2.CascadeClassifier(
	cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
	)

	# MediaPipe pose detector
	if MEDIAPIPE_AVAILABLE:
	self.mp_pose = mp.solutions.pose
	self.pose_detector = self.mp_pose.Pose(
	static_image_mode=True,
	min_detection_confidence=0.5
	)

	# Emotion classifier from transformers
	if TRANSFORMERS_AVAILABLE:
	self.emotion_classifier = pipeline(
	"image-classification",
	model="dima806/facial_emotions_image_detection"
	)

	self.models_initialized = True
	logger.info("Additional analysis models initialized")

	except Exception as e:
	logger.error(f"Error initializing models: {e}")
	self.models_initialized = False

	@spaces.GPU(duration=30)
	def ultra_supreme_analysis(self, image: Any, clip_fast: str, clip_classic: str, clip_best: str) -> Dict[str, Any]:
	"""Complete analysis using all available models"""

	# Initialize models if needed
	self._initialize_models()

	# Start with CLIP analysis
	clip_analysis = self._parse_clip_results(clip_fast, clip_classic, clip_best)

	# Convert image for processing
	if isinstance(image, Image.Image):
	img_array = np.array(image)
	img_rgb = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
	else:
	img_rgb = image
	image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

	# Initialize complete analysis structure
	analysis = {
	"clip_fast": clip_fast,
	"clip_classic": clip_classic,
	"clip_best": clip_best,
	"full_description": f"{clip_fast} {clip_classic} {clip_best}",
	"demographic": {
	"age_category": None,
	"age_confidence": 0,
	"gender": None,
	"gender_confidence": 0,
	"cultural_religious": []
	},
	"facial_ultra": {
	"eyes": [],
	"eyebrows": [],
	"nose": [],
	"mouth": [],
	"facial_hair": [],
	"skin": [],
	"structure": [],
	"face_count": 0,
	"face_locations": []
	},
	"emotional_state": {
	"primary_emotion": None,
	"emotion_confidence": 0,
	"emotion_distribution": {},
	"micro_expressions": [],
	"overall_demeanor": []
	},
	"clothing_accessories": {
	"headwear": [],
	"eyewear": [],
	"clothing": [],
	"accessories": [],
	"style": []
	},
	"environmental": {
	"setting_type": None,
	"specific_location": None,
	"lighting_analysis": [],
	"atmosphere": [],
	"objects": []
	},
	"pose_composition": {
	"body_language": [],
	"head_position": [],
	"eye_contact": [],
	"posture": [],
	"gesture": [],
	"pose_confidence": 0
	},
	"technical_analysis": {
	"shot_type": None,
	"angle": None,
	"lighting_setup": None,
	"composition": [],
	"suggested_equipment": {}
	},
	"intelligence_metrics": {
	"total_features_detected": 0,
	"analysis_depth_score": 0,
	"cultural_awareness_score": 0,
	"technical_optimization_score": 0,
	"model_confidence_average": 0
	}
	}

	# Merge CLIP analysis
	analysis = self._merge_analysis(analysis, clip_analysis)

	# Face detection and analysis
	face_analysis = self._analyze_faces(img_rgb, image)
	analysis = self._merge_analysis(analysis, face_analysis)

	# Pose analysis
	if MEDIAPIPE_AVAILABLE:
	pose_analysis = self._analyze_pose(image)
	analysis = self._merge_analysis(analysis, pose_analysis)

	# Emotion analysis
	if TRANSFORMERS_AVAILABLE and analysis["facial_ultra"]["face_count"] > 0:
	emotion_analysis = self._analyze_emotions(image)
	analysis = self._merge_analysis(analysis, emotion_analysis)

	# Scene and environment analysis
	scene_analysis = self._analyze_scene(clip_analysis)
	analysis = self._merge_analysis(analysis, scene_analysis)

	# Calculate intelligence metrics
	analysis = self._calculate_intelligence_metrics(analysis)

	return analysis

	def _parse_clip_results(self, clip_fast: str, clip_classic: str, clip_best: str) -> Dict[str, Any]:
	"""Parse CLIP results for structured information"""
	combined_text = f"{clip_fast} {clip_classic} {clip_best}".lower()

	analysis = {
	"demographic": {},
	"facial_ultra": {},
	"emotional_state": {},
	"clothing_accessories": {},
	"environmental": {},
	"pose_composition": {},
	"technical_analysis": {}
	}

	# Gender detection
	for gender, indicators in GENDER_INDICATORS.items():
	if any(indicator in combined_text for indicator in indicators):
	analysis["demographic"]["gender"] = gender
	analysis["demographic"]["gender_confidence"] = 0.8
	break

	# Age detection
	for age_category, indicators in MICRO_AGE_INDICATORS.items():
	if any(indicator in combined_text for indicator in indicators):
	analysis["demographic"]["age_category"] = age_category
	analysis["demographic"]["age_confidence"] = 0.7
	break

	# Facial features
	for feature_type, features in ULTRA_FACIAL_ANALYSIS.items():
	if isinstance(features, dict):
	for sub_type, sub_features in features.items():
	found = [f for f in sub_features if f in combined_text]
	if found and feature_type in analysis["facial_ultra"]:
	analysis["facial_ultra"][feature_type] = found
	else:
	found = [f for f in features if f in combined_text]
	if found:
	analysis["facial_ultra"][feature_type] = found

	# Emotions
	all_emotions = EMOTION_MICRO_EXPRESSIONS["primary_emotions"] + EMOTION_MICRO_EXPRESSIONS["complex_emotions"]
	found_emotions = [e for e in all_emotions if e in combined_text]
	if found_emotions:
	analysis["emotional_state"]["primary_emotion"] = found_emotions[0]
	analysis["emotional_state"]["micro_expressions"] = found_emotions

	# Environment
	for setting_type, settings in ENVIRONMENTAL_ULTRA_ANALYSIS["indoor_settings"].items():
	if any(s in combined_text for s in settings):
	analysis["environmental"]["setting_type"] = f"indoor_{setting_type}"
	break

	for setting_type, settings in ENVIRONMENTAL_ULTRA_ANALYSIS["outdoor_settings"].items():
	if any(s in combined_text for s in settings):
	analysis["environmental"]["setting_type"] = f"outdoor_{setting_type}"
	break

	# Technical analysis
	for shot_type in COMPOSITION_PHOTOGRAPHY_ULTRA["shot_types"]:
	if shot_type in combined_text:
	analysis["technical_analysis"]["shot_type"] = shot_type
	break

	return analysis

	def _analyze_faces(self, img_bgr: np.ndarray, img_pil: Image.Image) -> Dict[str, Any]:
	"""Analyze faces using OpenCV and DeepFace"""
	analysis = {"facial_ultra": {}, "demographic": {}, "emotional_state": {}}

	# OpenCV face detection
	gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
	faces = self.face_cascade.detectMultiScale(gray, 1.1, 4)

	analysis["facial_ultra"]["face_count"] = len(faces)
	analysis["facial_ultra"]["face_locations"] = faces.tolist() if len(faces) > 0 else []

	# DeepFace analysis for the first detected face
	if DEEPFACE_AVAILABLE and len(faces) > 0:
	try:
	# Analyze with DeepFace
	results = DeepFace.analyze(
	img_path=np.array(img_pil),
	actions=['age', 'gender', 'emotion', 'race'],
	enforce_detection=False,
	silent=True
	)

	if isinstance(results, list):
	results = results[0]

	# Extract demographics
	analysis["demographic"]["age_category"] = self._age_to_category(results.get('age', 0))
	analysis["demographic"]["age_confidence"] = 0.85
	analysis["demographic"]["gender"] = results.get('dominant_gender', '').lower()
	analysis["demographic"]["gender_confidence"] = results.get('gender', {}).get(
	results.get('dominant_gender', ''), 0
	) / 100.0

	# Extract emotions
	emotions = results.get('emotion', {})
	if emotions:
	sorted_emotions = sorted(emotions.items(), key=lambda x: x[1], reverse=True)
	analysis["emotional_state"]["primary_emotion"] = sorted_emotions[0][0]
	analysis["emotional_state"]["emotion_confidence"] = sorted_emotions[0][1] / 100.0
	analysis["emotional_state"]["emotion_distribution"] = {
	k: v/100.0 for k, v in emotions.items()
	}

	except Exception as e:
	logger.warning(f"DeepFace analysis failed: {e}")

	return analysis

	def _analyze_pose(self, image: Image.Image) -> Dict[str, Any]:
	"""Analyze body pose using MediaPipe"""
	analysis = {"pose_composition": {}}

	if not MEDIAPIPE_AVAILABLE or not self.pose_detector:
	return analysis

	try:
	# Convert PIL to RGB array
	image_rgb = np.array(image)

	# Process the image
	results = self.pose_detector.process(image_rgb)

	if results.pose_landmarks:
	landmarks = results.pose_landmarks.landmark

	# Analyze head position
	nose = landmarks[self.mp_pose.PoseLandmark.NOSE]
	left_eye = landmarks[self.mp_pose.PoseLandmark.LEFT_EYE]
	right_eye = landmarks[self.mp_pose.PoseLandmark.RIGHT_EYE]

	# Calculate head tilt
	eye_diff_y = abs(left_eye.y - right_eye.y)
	if eye_diff_y > 0.02:
	analysis["pose_composition"]["head_position"] = ["head tilted"]
	else:
	analysis["pose_composition"]["head_position"] = ["head straight"]

	# Analyze posture
	left_shoulder = landmarks[self.mp_pose.PoseLandmark.LEFT_SHOULDER]
	right_shoulder = landmarks[self.mp_pose.PoseLandmark.RIGHT_SHOULDER]
	shoulder_diff_y = abs(left_shoulder.y - right_shoulder.y)

	if shoulder_diff_y < 0.02:
	analysis["pose_composition"]["posture"] = ["upright posture", "balanced stance"]
	else:
	analysis["pose_composition"]["posture"] = ["asymmetric posture"]

	# Confidence based on visibility
	visibility_scores = [l.visibility for l in landmarks]
	analysis["pose_composition"]["pose_confidence"] = np.mean(visibility_scores)

	# Body language interpretation
	if nose.y < 0.3:
	analysis["pose_composition"]["body_language"].append("confident stance")

	except Exception as e:
	logger.warning(f"Pose analysis failed: {e}")

	return analysis

	def _analyze_emotions(self, image: Image.Image) -> Dict[str, Any]:
	"""Analyze emotions using transformer model"""
	analysis = {"emotional_state": {}}

	if not TRANSFORMERS_AVAILABLE or not self.emotion_classifier:
	return analysis

	try:
	# Run emotion classification
	predictions = self.emotion_classifier(image)

	if predictions:
	# Sort by confidence
	predictions.sort(key=lambda x: x['score'], reverse=True)

	# Primary emotion
	analysis["emotional_state"]["primary_emotion"] = predictions[0]['label'].lower()
	analysis["emotional_state"]["emotion_confidence"] = predictions[0]['score']

	# Emotion distribution
	analysis["emotional_state"]["emotion_distribution"] = {
	pred['label'].lower(): pred['score'] for pred in predictions[:5]
	}

	# Map to micro-expressions
	primary = predictions[0]['label'].lower()
	if primary in ['happy', 'joy']:
	analysis["emotional_state"]["micro_expressions"] = ["smile", "positive expression"]
	elif primary in ['sad', 'sorrow']:
	analysis["emotional_state"]["micro_expressions"] = ["downturned mouth", "melancholic"]
	elif primary in ['angry', 'disgust']:
	analysis["emotional_state"]["micro_expressions"] = ["furrowed brow", "tense jaw"]
	elif primary in ['surprise', 'fear']:
	analysis["emotional_state"]["micro_expressions"] = ["raised eyebrows", "wide eyes"]

	except Exception as e:
	logger.warning(f"Emotion analysis failed: {e}")

	return analysis

	def _analyze_scene(self, clip_analysis: Dict[str, Any]) -> Dict[str, Any]:
	"""Analyze scene and environment from CLIP results"""
	analysis = {"environmental": clip_analysis.get("environmental", {})}

	# Lighting analysis based on CLIP description
	combined_text = clip_analysis.get("full_description", "").lower()

	lighting_keywords = {
	"natural light": ["sunlight", "daylight", "outdoor", "sunny"],
	"artificial light": ["indoor", "lamp", "fluorescent", "led"],
	"dramatic lighting": ["dramatic", "moody", "contrast", "shadow"],
	"soft lighting": ["soft", "diffused", "gentle", "even"]
	}

	for light_type, keywords in lighting_keywords.items():
	if any(keyword in combined_text for keyword in keywords):
	analysis["environmental"]["lighting_analysis"].append(light_type)

	# Atmosphere
	if any(word in combined_text for word in ["professional", "formal", "business"]):
	analysis["environmental"]["atmosphere"].append("professional")
	if any(word in combined_text for word in ["casual", "relaxed", "informal"]):
	analysis["environmental"]["atmosphere"].append("casual")
	if any(word in combined_text for word in ["artistic", "creative", "abstract"]):
	analysis["environmental"]["atmosphere"].append("artistic")

	return analysis

	def _age_to_category(self, age: int) -> str:
	"""Convert numeric age to category"""
	if age < 2:
	return "infant"
	elif age < 12:
	return "child"
	elif age < 20:
	return "teen"
	elif age < 35:
	return "young_adult"
	elif age < 50:
	return "middle_aged"
	elif age < 65:
	return "senior"
	else:
	return "elderly"

	def _merge_analysis(self, base: Dict[str, Any], new: Dict[str, Any]) -> Dict[str, Any]:
	"""Merge analysis results"""
	for key, value in new.items():
	if key in base:
	if isinstance(value, dict) and isinstance(base[key], dict):
	base[key].update(value)
	elif isinstance(value, list) and isinstance(base[key], list):
	base[key].extend(value)
	elif value is not None and (not isinstance(base[key], (int, float)) or base[key] == 0):
	base[key] = value
	return base

	def _calculate_intelligence_metrics(self, analysis: Dict[str, Any]) -> Dict[str, Any]:
	"""Calculate intelligence metrics based on analysis completeness"""
	metrics = analysis["intelligence_metrics"]

	# Count detected features
	total_features = 0
	confidence_scores = []

	# Demographic features
	if analysis["demographic"]["age_category"]:
	total_features += 1
	confidence_scores.append(analysis["demographic"]["age_confidence"])
	if analysis["demographic"]["gender"]:
	total_features += 1
	confidence_scores.append(analysis["demographic"]["gender_confidence"])

	# Facial features
	for feature in ["eyes", "eyebrows", "nose", "mouth", "facial_hair", "skin", "structure"]:
	if analysis["facial_ultra"].get(feature):
	total_features += len(analysis["facial_ultra"][feature])

	# Emotional features
	if analysis["emotional_state"]["primary_emotion"]:
	total_features += 1
	confidence_scores.append(analysis["emotional_state"]["emotion_confidence"])

	# Pose features
	if analysis["pose_composition"].get("pose_confidence", 0) > 0:
	total_features += 1
	confidence_scores.append(analysis["pose_composition"]["pose_confidence"])

	# Environmental features
	if analysis["environmental"]["setting_type"]:
	total_features += 1
	total_features += len(analysis["environmental"].get("lighting_analysis", []))

	# Technical features
	if analysis["technical_analysis"]["shot_type"]:
	total_features += 1

	# Calculate scores
	metrics["total_features_detected"] = total_features
	metrics["analysis_depth_score"] = min(100, total_features * 5)

	# Cultural awareness (if religious/cultural indicators found)
	if analysis["demographic"].get("cultural_religious"):
	metrics["cultural_awareness_score"] = 80
	else:
	metrics["cultural_awareness_score"] = 40

	# Technical optimization score
	tech_features = sum([
	1 if analysis["technical_analysis"]["shot_type"] else 0,
	len(analysis["environmental"].get("lighting_analysis", [])),
	len(analysis["pose_composition"].get("posture", []))
	])
	metrics["technical_optimization_score"] = min(100, tech_features * 25)

	# Average confidence
	if confidence_scores:
	metrics["model_confidence_average"] = sum(confidence_scores) / len(confidence_scores)
	else:
	metrics["model_confidence_average"] = 0.5

	return analysis

	def build_ultra_supreme_prompt(self, ultra_analysis: Dict[str, Any], clip_results: List[str]) -> str:
	"""Build enhanced prompt based on comprehensive analysis"""
	prompt_parts = []

	# Start with the best CLIP result
	if clip_results:
	prompt_parts.append(clip_results[0])

	# Add demographic details if confident
	if ultra_analysis["demographic"]["age_category"] and ultra_analysis["demographic"]["age_confidence"] > 0.7:
	age_descriptors = QUALITY_DESCRIPTORS_ULTRA["based_on_age"].get(
	ultra_analysis["demographic"]["age_category"], []
	)
	if age_descriptors:
	prompt_parts.append(age_descriptors[0])

	# Add emotional context
	if ultra_analysis["emotional_state"]["primary_emotion"]:
	emotion = ultra_analysis["emotional_state"]["primary_emotion"]
	emotion_descriptors = QUALITY_DESCRIPTORS_ULTRA["based_on_emotion"].get(emotion, [])
	if emotion_descriptors:
	prompt_parts.append(f"{emotion_descriptors[0]} expression")

	# Add technical details
	if ultra_analysis["technical_analysis"]["shot_type"]:
	prompt_parts.append(ultra_analysis["technical_analysis"]["shot_type"])

	# Add lighting
	lighting = ultra_analysis["environmental"].get("lighting_analysis", [])
	if lighting:
	prompt_parts.append(f"with {lighting[0]}")

	# Combine parts
	enhanced_prompt = ", ".join(prompt_parts)

	# Clean up
	enhanced_prompt = re.sub(r'\s+', ' ', enhanced_prompt)
	enhanced_prompt = re.sub(r',\s*,+', ',', enhanced_prompt)

	return enhanced_prompt

	def calculate_ultra_supreme_score(self, prompt: str, ultra_analysis: Dict[str, Any]) -> Tuple[int, Dict[str, int]]:
	"""Calculate comprehensive score based on multi-model analysis"""
	breakdown = {}

	# Base score from prompt quality
	breakdown["prompt_quality"] = min(25, len(prompt) // 10)

	# Analysis depth score
	breakdown["analysis_depth"] = min(25, ultra_analysis["intelligence_metrics"]["analysis_depth_score"] // 4)

	# Model confidence score
	avg_confidence = ultra_analysis["intelligence_metrics"]["model_confidence_average"]
	breakdown["model_confidence"] = int(avg_confidence * 25)

	# Feature richness score
	total_features = ultra_analysis["intelligence_metrics"]["total_features_detected"]
	breakdown["feature_richness"] = min(25, total_features * 2)

	total_score = sum(breakdown.values())

	return total_score, breakdown