Segizu commited on
Commit
8196356
·
1 Parent(s): 37efbf7

metadata v12

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -60,17 +60,21 @@ def build_database():
60
  batch = dataset[i:i + batch_size]
61
  print(f"📦 Procesando lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
62
 
63
- for j, item in enumerate(batch):
64
  try:
65
- if not isinstance(item, dict) or "image" not in item:
66
- print(f"⚠️ Saltando item {i + j} - estructura inválida: {item}")
67
- continue
68
 
69
  image_url = item["image"]
70
  if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
71
  print(f"⚠️ Saltando item {i + j} - URL inválida: {image_url}")
72
  continue
73
 
 
 
 
 
 
 
74
  response = requests.get(image_url, headers=headers, timeout=10)
75
  response.raise_for_status()
76
  img = Image.open(BytesIO(response.content)).convert("RGB")
 
60
  batch = dataset[i:i + batch_size]
61
  print(f"📦 Procesando lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
62
 
63
+ for j in range(len(batch["image"])):
64
  try:
65
+ item = {"image": batch["image"][j]}
 
 
66
 
67
  image_url = item["image"]
68
  if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
69
  print(f"⚠️ Saltando item {i + j} - URL inválida: {image_url}")
70
  continue
71
 
72
+ # Autenticación para datasets privados
73
+ headers = {}
74
+ HF_TOKEN = os.getenv("HF_TOKEN")
75
+ if HF_TOKEN:
76
+ headers["Authorization"] = f"Bearer {HF_TOKEN}"
77
+
78
  response = requests.get(image_url, headers=headers, timeout=10)
79
  response.raise_for_status()
80
  img = Image.open(BytesIO(response.content)).convert("RGB")