Segizu commited on
Commit
288a128
·
1 Parent(s): 8196356

metadata no cache

Browse files
Files changed (1) hide show
  1. app.py +58 -72
app.py CHANGED
@@ -10,71 +10,57 @@ import gc
10
  import requests
11
  from io import BytesIO
12
 
13
- # 📁 Directorio local para embeddings
14
  EMBEDDINGS_DIR = Path("embeddings")
15
  EMBEDDINGS_DIR.mkdir(exist_ok=True)
16
- EMBEDDINGS_FILE = EMBEDDINGS_DIR / "embeddings.pkl"
17
 
18
- headers = {}
19
- HF_TOKEN = os.getenv("HF_TOKEN")
20
- if HF_TOKEN:
21
- headers["Authorization"] = f"Bearer {HF_TOKEN}"
22
-
23
-
24
-
25
- # ✅ Cargar el dataset remoto desde Hugging Face Datasets con metadata.csv
26
  dataset = load_dataset(
27
  "csv",
28
  data_files="metadata.csv",
29
  split="train",
30
  column_names=["image"],
31
- header=0 # 👈 asegúrate de que la primera fila se trate como encabezado
32
  )
33
 
34
  print("✅ Validación post-carga")
35
  print(dataset[0])
36
  print("Columnas:", dataset.column_names)
37
 
38
- print("✅ Primeros ítems de validación:")
39
- for i in range(5):
40
- print(dataset[i])
41
-
42
- # 🔄 Preprocesar imagen para DeepFace
43
  def preprocess_image(img: Image.Image) -> np.ndarray:
44
  img_rgb = img.convert("RGB")
45
  img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
46
  return np.array(img_resized)
47
 
48
- # 📦 Construir base de datos de embeddings
49
- def build_database():
50
- if EMBEDDINGS_FILE.exists():
51
- print("📂 Cargando embeddings desde archivo...")
52
- with open(EMBEDDINGS_FILE, "rb") as f:
53
- return pickle.load(f)
54
 
55
- print("🔄 Calculando embeddings...")
56
- database = []
 
57
  batch_size = 10
58
 
59
  for i in range(0, len(dataset), batch_size):
60
  batch = dataset[i:i + batch_size]
61
- print(f"📦 Procesando lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
62
 
63
  for j in range(len(batch["image"])):
64
- try:
65
- item = {"image": batch["image"][j]}
66
 
67
- image_url = item["image"]
68
- if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
69
- print(f"⚠️ Saltando item {i + j} - URL inválida: {image_url}")
70
- continue
71
 
72
- # Autenticación para datasets privados
73
- headers = {}
74
- HF_TOKEN = os.getenv("HF_TOKEN")
75
- if HF_TOKEN:
76
- headers["Authorization"] = f"Bearer {HF_TOKEN}"
77
 
 
78
  response = requests.get(image_url, headers=headers, timeout=10)
79
  response.raise_for_status()
80
  img = Image.open(BytesIO(response.content)).convert("RGB")
@@ -86,25 +72,19 @@ def build_database():
86
  enforce_detection=False
87
  )[0]["embedding"]
88
 
89
- database.append((f"image_{i + j}", img, embedding))
90
- print(f" Procesada imagen {i + j + 1}/{len(dataset)}")
 
91
 
 
92
  del img_processed
93
  gc.collect()
94
 
95
  except Exception as e:
96
- print(f"❌ Error al procesar imagen {i + j}: {str(e)}")
97
  continue
98
 
99
- # Guardar al final si hay datos
100
- if database:
101
- print("💾 Guardando embeddings finales...")
102
- with open(EMBEDDINGS_FILE, "wb") as f:
103
- pickle.dump(database, f)
104
-
105
- return database
106
-
107
- # 🔍 Buscar rostros similares
108
  def find_similar_faces(uploaded_image: Image.Image):
109
  try:
110
  img_processed = preprocess_image(uploaded_image)
@@ -116,42 +96,48 @@ def find_similar_faces(uploaded_image: Image.Image):
116
  del img_processed
117
  gc.collect()
118
  except Exception as e:
119
- print(f"Error al procesar imagen de entrada: {str(e)}")
120
- return [], "⚠ No se detectó un rostro válido."
121
 
122
  similarities = []
123
- for name, db_img, embedding in database:
124
- dist = np.linalg.norm(np.array(query_embedding) - np.array(embedding))
125
- sim_score = 1 / (1 + dist)
126
- similarities.append((sim_score, name, db_img))
127
 
128
- similarities.sort(reverse=True)
129
- top_matches = similarities[:5]
 
 
130
 
131
- gallery_items = []
132
- summary = ""
133
- for sim, name, img in top_matches:
134
- caption = f"{name} - Similitud: {sim:.2f}"
135
- gallery_items.append((np.array(img), caption))
136
- summary += caption + "\n"
 
 
 
 
 
 
 
 
137
 
138
- return gallery_items, summary
 
 
139
 
140
- # 🚀 Iniciar aplicación
141
- print("🚀 Iniciando aplicación...")
142
- database = build_database()
143
- print(f"✅ Base cargada con {len(database)} imágenes.")
144
 
145
- # 🎛️ Gradio UI
146
  demo = gr.Interface(
147
  fn=find_similar_faces,
148
  inputs=gr.Image(label="📤 Sube una imagen", type="pil"),
149
  outputs=[
150
- gr.Gallery(label="📸 Rostros más similares"),
151
- gr.Textbox(label="🧠 Similitud", lines=6)
152
  ],
153
- title="🔍 Buscador de Rostros con DeepFace",
154
- description="Sube una imagen y se comparará contra los rostros del dataset `Segizu/facial-recognition` almacenado en Hugging Face Datasets."
155
  )
156
 
157
  demo.launch()
 
10
  import requests
11
  from io import BytesIO
12
 
13
+ # 📁 Carpeta para guardar cada embedding
14
  EMBEDDINGS_DIR = Path("embeddings")
15
  EMBEDDINGS_DIR.mkdir(exist_ok=True)
 
16
 
17
+ # Cargar dataset CSV
 
 
 
 
 
 
 
18
  dataset = load_dataset(
19
  "csv",
20
  data_files="metadata.csv",
21
  split="train",
22
  column_names=["image"],
23
+ header=0
24
  )
25
 
26
  print("✅ Validación post-carga")
27
  print(dataset[0])
28
  print("Columnas:", dataset.column_names)
29
 
30
+ # 🔄 Preprocesamiento para DeepFace
 
 
 
 
31
  def preprocess_image(img: Image.Image) -> np.ndarray:
32
  img_rgb = img.convert("RGB")
33
  img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
34
  return np.array(img_resized)
35
 
36
+ # 🔐 Header si el dataset es privado
37
+ HF_TOKEN = os.getenv("HF_TOKEN")
38
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
 
 
 
39
 
40
+ # 📦 Construir base (embedding por archivo)
41
+ def build_database():
42
+ print("🔄 Generando embeddings...")
43
  batch_size = 10
44
 
45
  for i in range(0, len(dataset), batch_size):
46
  batch = dataset[i:i + batch_size]
47
+ print(f"📦 Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
48
 
49
  for j in range(len(batch["image"])):
50
+ item = {"image": batch["image"][j]}
51
+ image_url = item["image"]
52
 
53
+ # Validar
54
+ if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
55
+ print(f"⚠️ Saltando {i + j} - URL inválida: {image_url}")
56
+ continue
57
 
58
+ name = f"image_{i + j}"
59
+ emb_path = EMBEDDINGS_DIR / f"{name}.pkl"
60
+ if emb_path.exists():
61
+ continue # Ya existe
 
62
 
63
+ try:
64
  response = requests.get(image_url, headers=headers, timeout=10)
65
  response.raise_for_status()
66
  img = Image.open(BytesIO(response.content)).convert("RGB")
 
72
  enforce_detection=False
73
  )[0]["embedding"]
74
 
75
+ # Guardar como archivo individual
76
+ with open(emb_path, "wb") as f:
77
+ pickle.dump({"name": name, "img": img, "embedding": embedding}, f)
78
 
79
+ print(f"✅ Guardado: {name}")
80
  del img_processed
81
  gc.collect()
82
 
83
  except Exception as e:
84
+ print(f"❌ Error en {name}: {e}")
85
  continue
86
 
87
+ # 🔍 Buscar similitudes
 
 
 
 
 
 
 
 
88
  def find_similar_faces(uploaded_image: Image.Image):
89
  try:
90
  img_processed = preprocess_image(uploaded_image)
 
96
  del img_processed
97
  gc.collect()
98
  except Exception as e:
99
+ return [], f"Error procesando imagen: {str(e)}"
 
100
 
101
  similarities = []
 
 
 
 
102
 
103
+ for emb_file in EMBEDDINGS_DIR.glob("*.pkl"):
104
+ try:
105
+ with open(emb_file, "rb") as f:
106
+ record = pickle.load(f)
107
 
108
+ name = record["name"]
109
+ img = record["img"]
110
+ emb = record["embedding"]
111
+
112
+ dist = np.linalg.norm(np.array(query_embedding) - np.array(emb))
113
+ sim_score = 1 / (1 + dist)
114
+ similarities.append((sim_score, name, np.array(img)))
115
+
116
+ except Exception as e:
117
+ print(f"⚠ Error leyendo {emb_file}: {e}")
118
+ continue
119
+
120
+ similarities.sort(reverse=True)
121
+ top = similarities[:5]
122
 
123
+ gallery = [(img, f"{name} - Similitud: {sim:.2f}") for sim, name, img in top]
124
+ summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
125
+ return gallery, summary
126
 
127
+ # 🚀 Ejecutar al inicio
128
+ print("🚀 Iniciando app...")
129
+ build_database()
 
130
 
131
+ # 🎛️ Interfaz Gradio
132
  demo = gr.Interface(
133
  fn=find_similar_faces,
134
  inputs=gr.Image(label="📤 Sube una imagen", type="pil"),
135
  outputs=[
136
+ gr.Gallery(label="📸 Rostros similares"),
137
+ gr.Textbox(label="🧠 Detalle", lines=6)
138
  ],
139
+ title="🔍 Reconocimiento facial con DeepFace",
140
+ description="Sube una imagen y encuentra coincidencias en el dataset privado de Hugging Face usando embeddings Facenet."
141
  )
142
 
143
  demo.launch()