Spaces:

Segizu
/

Face_Recognition

Build error

App Files Files Community

Segizu commited on May 9

Commit

288a128

1 Parent(s): 8196356

metadata no cache

Browse files

Files changed (1) hide show

app.py +58 -72

app.py CHANGED Viewed

@@ -10,71 +10,57 @@ import gc
 import requests
 from io import BytesIO
-# 📁 Directorio local para embeddings
 EMBEDDINGS_DIR = Path("embeddings")
 EMBEDDINGS_DIR.mkdir(exist_ok=True)
-EMBEDDINGS_FILE = EMBEDDINGS_DIR / "embeddings.pkl"
-headers = {}
-HF_TOKEN = os.getenv("HF_TOKEN")
-if HF_TOKEN:
-    headers["Authorization"] = f"Bearer {HF_TOKEN}"
-# ✅ Cargar el dataset remoto desde Hugging Face Datasets con metadata.csv
 dataset = load_dataset(
     "csv",
     data_files="metadata.csv",
     split="train",
     column_names=["image"],
-    header=0  # 👈 asegúrate de que la primera fila se trate como encabezado
 )
 print("✅ Validación post-carga")
 print(dataset[0])
 print("Columnas:", dataset.column_names)
-print("✅ Primeros ítems de validación:")
-for i in range(5):
-    print(dataset[i])
-# 🔄 Preprocesar imagen para DeepFace
 def preprocess_image(img: Image.Image) -> np.ndarray:
     img_rgb = img.convert("RGB")
     img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
     return np.array(img_resized)
-# 📦 Construir base de datos de embeddings
-def build_database():
-    if EMBEDDINGS_FILE.exists():
-        print("📂 Cargando embeddings desde archivo...")
-        with open(EMBEDDINGS_FILE, "rb") as f:
-            return pickle.load(f)
-    print("🔄 Calculando embeddings...")
-    database = []
     batch_size = 10
     for i in range(0, len(dataset), batch_size):
         batch = dataset[i:i + batch_size]
-        print(f"📦 Procesando lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
         for j in range(len(batch["image"])):
-            try:
-                item = {"image": batch["image"][j]}
-                image_url = item["image"]
-                if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
-                    print(f"⚠️ Saltando item {i + j} - URL inválida: {image_url}")
-                    continue
-                # Autenticación para datasets privados
-                headers = {}
-                HF_TOKEN = os.getenv("HF_TOKEN")
-                if HF_TOKEN:
-                    headers["Authorization"] = f"Bearer {HF_TOKEN}"
                 response = requests.get(image_url, headers=headers, timeout=10)
                 response.raise_for_status()
                 img = Image.open(BytesIO(response.content)).convert("RGB")
@@ -86,25 +72,19 @@ def build_database():
                     enforce_detection=False
                 )[0]["embedding"]
-                database.append((f"image_{i + j}", img, embedding))
-                print(f"✅ Procesada imagen {i + j + 1}/{len(dataset)}")
                 del img_processed
                 gc.collect()
             except Exception as e:
-                print(f"❌ Error al procesar imagen {i + j}: {str(e)}")
                 continue
-    # Guardar al final si hay datos
-    if database:
-        print("💾 Guardando embeddings finales...")
-        with open(EMBEDDINGS_FILE, "wb") as f:
-            pickle.dump(database, f)
-    return database
-# 🔍 Buscar rostros similares
 def find_similar_faces(uploaded_image: Image.Image):
     try:
         img_processed = preprocess_image(uploaded_image)
@@ -116,42 +96,48 @@ def find_similar_faces(uploaded_image: Image.Image):
         del img_processed
         gc.collect()
     except Exception as e:
-        print(f"Error al procesar imagen de entrada: {str(e)}")
-        return [], "⚠ No se detectó un rostro válido."
     similarities = []
-    for name, db_img, embedding in database:
-        dist = np.linalg.norm(np.array(query_embedding) - np.array(embedding))
-        sim_score = 1 / (1 + dist)
-        similarities.append((sim_score, name, db_img))
-    similarities.sort(reverse=True)
-    top_matches = similarities[:5]
-    gallery_items = []
-    summary = ""
-    for sim, name, img in top_matches:
-        caption = f"{name} - Similitud: {sim:.2f}"
-        gallery_items.append((np.array(img), caption))
-        summary += caption + "\n"
-    return gallery_items, summary
-# 🚀 Iniciar aplicación
-print("🚀 Iniciando aplicación...")
-database = build_database()
-print(f"✅ Base cargada con {len(database)} imágenes.")
-# 🎛️ Gradio UI
 demo = gr.Interface(
     fn=find_similar_faces,
     inputs=gr.Image(label="📤 Sube una imagen", type="pil"),
     outputs=[
-        gr.Gallery(label="📸 Rostros más similares"),
-        gr.Textbox(label="🧠 Similitud", lines=6)
     ],
-    title="🔍 Buscador de Rostros con DeepFace",
-    description="Sube una imagen y se comparará contra los rostros del dataset `Segizu/facial-recognition` almacenado en Hugging Face Datasets."
 )
 demo.launch()

 import requests
 from io import BytesIO
+# 📁 Carpeta para guardar cada embedding
 EMBEDDINGS_DIR = Path("embeddings")
 EMBEDDINGS_DIR.mkdir(exist_ok=True)
+# ✅ Cargar dataset CSV
 dataset = load_dataset(
     "csv",
     data_files="metadata.csv",
     split="train",
     column_names=["image"],
+    header=0
 )
 print("✅ Validación post-carga")
 print(dataset[0])
 print("Columnas:", dataset.column_names)
+# 🔄 Preprocesamiento para DeepFace
 def preprocess_image(img: Image.Image) -> np.ndarray:
     img_rgb = img.convert("RGB")
     img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
     return np.array(img_resized)
+# 🔐 Header si el dataset es privado
+HF_TOKEN = os.getenv("HF_TOKEN")
+headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
+# 📦 Construir base (embedding por archivo)
+def build_database():
+    print("🔄 Generando embeddings...")
     batch_size = 10
     for i in range(0, len(dataset), batch_size):
         batch = dataset[i:i + batch_size]
+        print(f"📦 Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
         for j in range(len(batch["image"])):
+            item = {"image": batch["image"][j]}
+            image_url = item["image"]
+            # Validar
+            if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
+                print(f"⚠️ Saltando {i + j} - URL inválida: {image_url}")
+                continue
+            name = f"image_{i + j}"
+            emb_path = EMBEDDINGS_DIR / f"{name}.pkl"
+            if emb_path.exists():
+                continue  # Ya existe
+            try:
                 response = requests.get(image_url, headers=headers, timeout=10)
                 response.raise_for_status()
                 img = Image.open(BytesIO(response.content)).convert("RGB")
                     enforce_detection=False
                 )[0]["embedding"]
+                # Guardar como archivo individual
+                with open(emb_path, "wb") as f:
+                    pickle.dump({"name": name, "img": img, "embedding": embedding}, f)
+                print(f"✅ Guardado: {name}")
                 del img_processed
                 gc.collect()
             except Exception as e:
+                print(f"❌ Error en {name}: {e}")
                 continue
+# 🔍 Buscar similitudes
 def find_similar_faces(uploaded_image: Image.Image):
     try:
         img_processed = preprocess_image(uploaded_image)
         del img_processed
         gc.collect()
     except Exception as e:
+        return [], f"⚠ Error procesando imagen: {str(e)}"
     similarities = []
+    for emb_file in EMBEDDINGS_DIR.glob("*.pkl"):
+        try:
+            with open(emb_file, "rb") as f:
+                record = pickle.load(f)
+            name = record["name"]
+            img = record["img"]
+            emb = record["embedding"]
+            dist = np.linalg.norm(np.array(query_embedding) - np.array(emb))
+            sim_score = 1 / (1 + dist)
+            similarities.append((sim_score, name, np.array(img)))
+        except Exception as e:
+            print(f"⚠ Error leyendo {emb_file}: {e}")
+            continue
+    similarities.sort(reverse=True)
+    top = similarities[:5]
+    gallery = [(img, f"{name} - Similitud: {sim:.2f}") for sim, name, img in top]
+    summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
+    return gallery, summary
+# 🚀 Ejecutar al inicio
+print("🚀 Iniciando app...")
+build_database()
+# 🎛️ Interfaz Gradio
 demo = gr.Interface(
     fn=find_similar_faces,
     inputs=gr.Image(label="📤 Sube una imagen", type="pil"),
     outputs=[
+        gr.Gallery(label="📸 Rostros similares"),
+        gr.Textbox(label="🧠 Detalle", lines=6)
     ],
+    title="🔍 Reconocimiento facial con DeepFace",
+    description="Sube una imagen y encuentra coincidencias en el dataset privado de Hugging Face usando embeddings Facenet."
 )
 demo.launch()