Spaces:

Segizu
/

Face_Recognition

Build error

App Files Files Community

Face_Recognition / app.py

Segizu

metadata v12

752c0fd 2 months ago

raw

history blame

5.46 kB

	import numpy as np
	from PIL import Image, UnidentifiedImageError
	import gradio as gr
	from deepface import DeepFace
	from datasets import load_dataset, Image as HfImage
	import os
	import pickle
	from pathlib import Path
	import gc
	import requests
	from io import BytesIO

	# 🔑 Token de autenticación
	HF_TOKEN = os.getenv("HF_TOKEN")
	if not HF_TOKEN:
	raise ValueError("⚠️ Por favor, configura la variable de entorno HF_TOKEN para acceder al dataset privado")

	# 📁 Directorio para embeddings
	EMBEDDINGS_DIR = Path("embeddings")
	EMBEDDINGS_DIR.mkdir(exist_ok=True)
	EMBEDDINGS_FILE = EMBEDDINGS_DIR / "embeddings.pkl"

	# ✅ Cargar dataset desde metadata.csv (con URLs absolutas)
	dataset = load_dataset("csv", data_files="metadata.csv")

	# 🔄 Preprocesar imagen para Facenet
	def preprocess_image(img: Image.Image) -> np.ndarray:
	img_rgb = img.convert("RGB")
	img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
	return np.array(img_resized)

	# 📦 Construir base de datos de embeddings
	def build_database():
	if EMBEDDINGS_FILE.exists():
	print("📂 Cargando embeddings desde el archivo...")
	with open(EMBEDDINGS_FILE, 'rb') as f:
	return pickle.load(f)

	print("🔄 Calculando embeddings (esto puede tomar unos minutos)...")
	database = []
	batch_size = 10

	# Get the train split
	train_dataset = dataset["train"]

	# Debug: Print dataset structure
	print("Dataset structure:", train_dataset.features)
	print("First item structure:", train_dataset[0])
	print("Dataset type:", type(train_dataset))
	print("Dataset item type:", type(train_dataset[0]))

	for i in range(0, len(train_dataset), batch_size):
	batch = train_dataset[i:i + batch_size]
	print(f"📦 Procesando lote {i // batch_size + 1}/{(len(train_dataset) + batch_size - 1) // batch_size}")

	for j, item in enumerate(batch):
	try:
	print(f"Debug - Processing item {i+j}")
	print(f"Debug - Item type: {type(item)}")
	print(f"Debug - Item content: {item}")

	# Get the image URL
	image_url = item["image"]
	if not isinstance(image_url, str) or not image_url.startswith("http"):
	print(f"⚠️ Skipping item {i+j} - Invalid URL format")
	continue

	# Download and process the image
	response = requests.get(image_url, timeout=10)
	response.raise_for_status()
	img = Image.open(BytesIO(response.content))

	# Ensure image is in RGB mode
	img = img.convert("RGB")
	img_processed = preprocess_image(img)
	embedding = DeepFace.represent(
	img_path=img_processed,
	model_name="Facenet",
	enforce_detection=False
	)[0]["embedding"]

	database.append((f"image_{i+j}", img, embedding))
	print(f"✅ Procesada imagen {i+j+1}/{len(train_dataset)}")

	del img_processed
	gc.collect()

	except Exception as e:
	print(f"❌ No se pudo procesar imagen {i+j}: {str(e)}")
	print(f"Error details: {type(e).__name__}")
	import traceback
	print(traceback.format_exc())
	continue

	# 💾 Guardar después de cada batch
	if database:
	print("💾 Guardando progreso...")
	with open(EMBEDDINGS_FILE, 'wb') as f:
	pickle.dump(database, f)

	gc.collect()

	return database

	# 🔍 Buscar rostros similares
	def find_similar_faces(uploaded_image: Image.Image):
	try:
	img_processed = preprocess_image(uploaded_image)
	query_embedding = DeepFace.represent(
	img_path=img_processed,
	model_name="Facenet",
	enforce_detection=False
	)[0]["embedding"]
	del img_processed
	gc.collect()
	except Exception as e:
	print(f"Error al procesar imagen de consulta: {str(e)}")
	return [], "⚠ No se detectó un rostro válido en la imagen."

	similarities = []
	for name, db_img, embedding in database:
	dist = np.linalg.norm(np.array(query_embedding) - np.array(embedding))
	sim_score = 1 / (1 + dist)
	similarities.append((sim_score, name, db_img))

	similarities.sort(reverse=True)
	top_matches = similarities[:5]

	gallery_items = []
	text_summary = ""
	for sim, name, img in top_matches:
	caption = f"{name} - Similitud: {sim:.2f}"
	gallery_items.append((img, caption))
	text_summary += caption + "\n"

	return gallery_items, text_summary

	# ⚙️ Inicializar
	print("🚀 Iniciando aplicación...")
	database = build_database()
	print(f"✅ Base de datos cargada con {len(database)} imágenes")

	# 🎛️ Interfaz Gradio
	demo = gr.Interface(
	fn=find_similar_faces,
	inputs=gr.Image(label="📤 Sube una imagen", type="pil"),
	outputs=[
	gr.Gallery(label="📸 Rostros más similares"),
	gr.Textbox(label="🧠 Similitud", lines=6)
	],
	title="🔍 Buscador de Rostros con DeepFace",
	description="Sube una imagen y se comparará contra los rostros del dataset alojado en Hugging Face (`Segizu/facial-recognition`)."
	)

	demo.launch()