Segizu's picture
metadata no cache
288a128
raw
history blame
4.54 kB
import numpy as np
from PIL import Image
import gradio as gr
from deepface import DeepFace
from datasets import load_dataset
import os
import pickle
from pathlib import Path
import gc
import requests
from io import BytesIO
# πŸ“ Carpeta para guardar cada embedding
EMBEDDINGS_DIR = Path("embeddings")
EMBEDDINGS_DIR.mkdir(exist_ok=True)
# βœ… Cargar dataset CSV
dataset = load_dataset(
"csv",
data_files="metadata.csv",
split="train",
column_names=["image"],
header=0
)
print("βœ… ValidaciΓ³n post-carga")
print(dataset[0])
print("Columnas:", dataset.column_names)
# πŸ”„ Preprocesamiento para DeepFace
def preprocess_image(img: Image.Image) -> np.ndarray:
img_rgb = img.convert("RGB")
img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
return np.array(img_resized)
# πŸ” Header si el dataset es privado
HF_TOKEN = os.getenv("HF_TOKEN")
headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
# πŸ“¦ Construir base (embedding por archivo)
def build_database():
print("πŸ”„ Generando embeddings...")
batch_size = 10
for i in range(0, len(dataset), batch_size):
batch = dataset[i:i + batch_size]
print(f"πŸ“¦ Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
for j in range(len(batch["image"])):
item = {"image": batch["image"][j]}
image_url = item["image"]
# Validar
if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
print(f"⚠️ Saltando {i + j} - URL invÑlida: {image_url}")
continue
name = f"image_{i + j}"
emb_path = EMBEDDINGS_DIR / f"{name}.pkl"
if emb_path.exists():
continue # Ya existe
try:
response = requests.get(image_url, headers=headers, timeout=10)
response.raise_for_status()
img = Image.open(BytesIO(response.content)).convert("RGB")
img_processed = preprocess_image(img)
embedding = DeepFace.represent(
img_path=img_processed,
model_name="Facenet",
enforce_detection=False
)[0]["embedding"]
# Guardar como archivo individual
with open(emb_path, "wb") as f:
pickle.dump({"name": name, "img": img, "embedding": embedding}, f)
print(f"βœ… Guardado: {name}")
del img_processed
gc.collect()
except Exception as e:
print(f"❌ Error en {name}: {e}")
continue
# πŸ” Buscar similitudes
def find_similar_faces(uploaded_image: Image.Image):
try:
img_processed = preprocess_image(uploaded_image)
query_embedding = DeepFace.represent(
img_path=img_processed,
model_name="Facenet",
enforce_detection=False
)[0]["embedding"]
del img_processed
gc.collect()
except Exception as e:
return [], f"⚠ Error procesando imagen: {str(e)}"
similarities = []
for emb_file in EMBEDDINGS_DIR.glob("*.pkl"):
try:
with open(emb_file, "rb") as f:
record = pickle.load(f)
name = record["name"]
img = record["img"]
emb = record["embedding"]
dist = np.linalg.norm(np.array(query_embedding) - np.array(emb))
sim_score = 1 / (1 + dist)
similarities.append((sim_score, name, np.array(img)))
except Exception as e:
print(f"⚠ Error leyendo {emb_file}: {e}")
continue
similarities.sort(reverse=True)
top = similarities[:5]
gallery = [(img, f"{name} - Similitud: {sim:.2f}") for sim, name, img in top]
summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
return gallery, summary
# πŸš€ Ejecutar al inicio
print("πŸš€ Iniciando app...")
build_database()
# πŸŽ›οΈ Interfaz Gradio
demo = gr.Interface(
fn=find_similar_faces,
inputs=gr.Image(label="πŸ“€ Sube una imagen", type="pil"),
outputs=[
gr.Gallery(label="πŸ“Έ Rostros similares"),
gr.Textbox(label="🧠 Detalle", lines=6)
],
title="πŸ” Reconocimiento facial con DeepFace",
description="Sube una imagen y encuentra coincidencias en el dataset privado de Hugging Face usando embeddings Facenet."
)
demo.launch()