import numpy as np from PIL import Image import gradio as gr from deepface import DeepFace from datasets import load_dataset import os import pickle from pathlib import Path import gc import requests from io import BytesIO # πŸ“ Carpeta para guardar cada embedding EMBEDDINGS_DIR = Path("embeddings") EMBEDDINGS_DIR.mkdir(exist_ok=True) # βœ… Cargar dataset CSV dataset = load_dataset( "csv", data_files="metadata.csv", split="train", column_names=["image"], header=0 ) print("βœ… ValidaciΓ³n post-carga") print(dataset[0]) print("Columnas:", dataset.column_names) # πŸ”„ Preprocesamiento para DeepFace def preprocess_image(img: Image.Image) -> np.ndarray: img_rgb = img.convert("RGB") img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS) return np.array(img_resized) # πŸ” Header si el dataset es privado HF_TOKEN = os.getenv("HF_TOKEN") headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {} # πŸ“¦ Construir base (embedding por archivo) def build_database(): print("πŸ”„ Generando embeddings...") batch_size = 10 for i in range(0, len(dataset), batch_size): batch = dataset[i:i + batch_size] print(f"πŸ“¦ Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}") for j in range(len(batch["image"])): item = {"image": batch["image"][j]} image_url = item["image"] # Validar if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image": print(f"⚠️ Saltando {i + j} - URL invΓ‘lida: {image_url}") continue name = f"image_{i + j}" emb_path = EMBEDDINGS_DIR / f"{name}.pkl" if emb_path.exists(): continue # Ya existe try: response = requests.get(image_url, headers=headers, timeout=10) response.raise_for_status() img = Image.open(BytesIO(response.content)).convert("RGB") img_processed = preprocess_image(img) embedding = DeepFace.represent( img_path=img_processed, model_name="Facenet", enforce_detection=False )[0]["embedding"] # Guardar como archivo individual with open(emb_path, "wb") as f: pickle.dump({"name": name, "img": img, "embedding": embedding}, f) print(f"βœ… Guardado: {name}") del img_processed gc.collect() except Exception as e: print(f"❌ Error en {name}: {e}") continue # πŸ” Buscar similitudes def find_similar_faces(uploaded_image: Image.Image): try: img_processed = preprocess_image(uploaded_image) query_embedding = DeepFace.represent( img_path=img_processed, model_name="Facenet", enforce_detection=False )[0]["embedding"] del img_processed gc.collect() except Exception as e: return [], f"⚠ Error procesando imagen: {str(e)}" similarities = [] for emb_file in EMBEDDINGS_DIR.glob("*.pkl"): try: with open(emb_file, "rb") as f: record = pickle.load(f) name = record["name"] img = record["img"] emb = record["embedding"] dist = np.linalg.norm(np.array(query_embedding) - np.array(emb)) sim_score = 1 / (1 + dist) similarities.append((sim_score, name, np.array(img))) except Exception as e: print(f"⚠ Error leyendo {emb_file}: {e}") continue similarities.sort(reverse=True) top = similarities[:5] gallery = [(img, f"{name} - Similitud: {sim:.2f}") for sim, name, img in top] summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top]) return gallery, summary # πŸš€ Ejecutar al inicio print("πŸš€ Iniciando app...") build_database() # πŸŽ›οΈ Interfaz Gradio demo = gr.Interface( fn=find_similar_faces, inputs=gr.Image(label="πŸ“€ Sube una imagen", type="pil"), outputs=[ gr.Gallery(label="πŸ“Έ Rostros similares"), gr.Textbox(label="🧠 Detalle", lines=6) ], title="πŸ” Reconocimiento facial con DeepFace", description="Sube una imagen y encuentra coincidencias en el dataset privado de Hugging Face usando embeddings Facenet." ) demo.launch()