Segizu commited on
Commit
752c0fd
·
1 Parent(s): 14e3122

metadata v12

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -40,15 +40,18 @@ def build_database():
40
  database = []
41
  batch_size = 10
42
 
 
 
 
43
  # Debug: Print dataset structure
44
- print("Dataset structure:", dataset.features)
45
- print("First item structure:", dataset["train"][0])
46
- print("Dataset type:", type(dataset))
47
- print("Dataset item type:", type(dataset["train"][0]))
48
 
49
- for i in range(0, len(dataset["train"]), batch_size):
50
- batch = dataset["train"][i:i + batch_size]
51
- print(f"📦 Procesando lote {i // batch_size + 1}/{(len(dataset['train']) + batch_size - 1) // batch_size}")
52
 
53
  for j, item in enumerate(batch):
54
  try:
@@ -77,7 +80,7 @@ def build_database():
77
  )[0]["embedding"]
78
 
79
  database.append((f"image_{i+j}", img, embedding))
80
- print(f"✅ Procesada imagen {i+j+1}/{len(dataset['train'])}")
81
 
82
  del img_processed
83
  gc.collect()
 
40
  database = []
41
  batch_size = 10
42
 
43
+ # Get the train split
44
+ train_dataset = dataset["train"]
45
+
46
  # Debug: Print dataset structure
47
+ print("Dataset structure:", train_dataset.features)
48
+ print("First item structure:", train_dataset[0])
49
+ print("Dataset type:", type(train_dataset))
50
+ print("Dataset item type:", type(train_dataset[0]))
51
 
52
+ for i in range(0, len(train_dataset), batch_size):
53
+ batch = train_dataset[i:i + batch_size]
54
+ print(f"📦 Procesando lote {i // batch_size + 1}/{(len(train_dataset) + batch_size - 1) // batch_size}")
55
 
56
  for j, item in enumerate(batch):
57
  try:
 
80
  )[0]["embedding"]
81
 
82
  database.append((f"image_{i+j}", img, embedding))
83
+ print(f"✅ Procesada imagen {i+j+1}/{len(train_dataset)}")
84
 
85
  del img_processed
86
  gc.collect()