Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ from pdf2image import convert_from_path
|
|
7 |
from PIL import Image
|
8 |
from torch.utils.data import DataLoader
|
9 |
from tqdm import tqdm
|
|
|
10 |
|
11 |
from colpali_engine.models import ColQwen2, ColQwen2Processor
|
12 |
|
@@ -61,9 +62,16 @@ def index(files, ds):
|
|
61 |
|
62 |
def convert_files(files):
|
63 |
images = []
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
if len(images) >= 150:
|
68 |
raise gr.Error("The number of images in the dataset should be less than 150.")
|
69 |
return images
|
|
|
7 |
from PIL import Image
|
8 |
from torch.utils.data import DataLoader
|
9 |
from tqdm import tqdm
|
10 |
+
import tempfile
|
11 |
|
12 |
from colpali_engine.models import ColQwen2, ColQwen2Processor
|
13 |
|
|
|
62 |
|
63 |
def convert_files(files):
|
64 |
images = []
|
65 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
66 |
+
for f in files:
|
67 |
+
file_path = f['filepath']
|
68 |
+
temp_file_path = os.path.join(temp_dir, f['name'])
|
69 |
+
shutil.copy(file_path, temp_file_path)
|
70 |
+
try:
|
71 |
+
images.extend(pdf2image.convert_from_path(temp_file_path, thread_count=4))
|
72 |
+
except Exception as e:
|
73 |
+
print(f"Error converting {temp_file_path}: {e}")
|
74 |
+
# Handle the error, maybe skip the file or raise a Gradio error
|
75 |
if len(images) >= 150:
|
76 |
raise gr.Error("The number of images in the dataset should be less than 150.")
|
77 |
return images
|