Segizu commited on
Commit
e30a3b3
·
1 Parent(s): 510e6ef

metadata v8

Browse files
Files changed (1) hide show
  1. app.py +30 -8
app.py CHANGED
@@ -41,6 +41,10 @@ def build_database():
41
  database = []
42
  batch_size = 10
43
 
 
 
 
 
44
  for i in range(0, len(dataset), batch_size):
45
  batch = dataset[i:i + batch_size]
46
  print(f"📦 Procesando lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
@@ -48,14 +52,29 @@ def build_database():
48
  for j, item in enumerate(batch):
49
  try:
50
  img_data = item["image"]
51
-
52
- # Convertir a PIL Image si es necesario
53
- if isinstance(img_data, dict) and "bytes" in img_data:
54
- img = Image.open(BytesIO(img_data["bytes"]))
55
- elif isinstance(img_data, str) and img_data.startswith("http"):
56
- response = requests.get(img_data, timeout=10)
57
- response.raise_for_status()
58
- img = Image.open(BytesIO(response.content))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  elif isinstance(img_data, Image.Image):
60
  img = img_data
61
  else:
@@ -81,6 +100,9 @@ def build_database():
81
  continue
82
  except Exception as e:
83
  print(f"❌ No se pudo procesar imagen {i+j}: {str(e)}")
 
 
 
84
  continue
85
 
86
  # 💾 Guardar después de cada batch
 
41
  database = []
42
  batch_size = 10
43
 
44
+ # Debug: Print dataset structure
45
+ print("Dataset structure:", dataset.features)
46
+ print("First item structure:", dataset[0])
47
+
48
  for i in range(0, len(dataset), batch_size):
49
  batch = dataset[i:i + batch_size]
50
  print(f"📦 Procesando lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
 
52
  for j, item in enumerate(batch):
53
  try:
54
  img_data = item["image"]
55
+ print(f"Debug - Image data type: {type(img_data)}")
56
+ print(f"Debug - Image data content: {img_data}")
57
+
58
+ # Handle different image data formats
59
+ if hasattr(img_data, 'numpy'):
60
+ # If it's a tensor/array, convert to PIL
61
+ img = Image.fromarray(img_data.numpy())
62
+ elif isinstance(img_data, dict):
63
+ # If it's a dictionary, try to get the image data
64
+ if "bytes" in img_data:
65
+ img = Image.open(BytesIO(img_data["bytes"]))
66
+ elif "path" in img_data:
67
+ img = Image.open(img_data["path"])
68
+ else:
69
+ print(f"❌ Formato de diccionario no soportado: {img_data.keys()}")
70
+ continue
71
+ elif isinstance(img_data, str):
72
+ if img_data.startswith("http"):
73
+ response = requests.get(img_data, timeout=10)
74
+ response.raise_for_status()
75
+ img = Image.open(BytesIO(response.content))
76
+ else:
77
+ img = Image.open(img_data)
78
  elif isinstance(img_data, Image.Image):
79
  img = img_data
80
  else:
 
100
  continue
101
  except Exception as e:
102
  print(f"❌ No se pudo procesar imagen {i+j}: {str(e)}")
103
+ print(f"Error details: {type(e).__name__}")
104
+ import traceback
105
+ print(traceback.format_exc())
106
  continue
107
 
108
  # 💾 Guardar después de cada batch