hantech commited on
Commit
5ad6fc2
·
verified ·
1 Parent(s): b11b5f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -7,6 +7,7 @@ from pdf2image import convert_from_path
7
  from PIL import Image
8
  from torch.utils.data import DataLoader
9
  from tqdm import tqdm
 
10
 
11
  from colpali_engine.models import ColQwen2, ColQwen2Processor
12
 
@@ -61,9 +62,16 @@ def index(files, ds):
61
 
62
  def convert_files(files):
63
  images = []
64
- for f in files:
65
- images.extend(convert_from_path(f, thread_count=4))
66
-
 
 
 
 
 
 
 
67
  if len(images) >= 150:
68
  raise gr.Error("The number of images in the dataset should be less than 150.")
69
  return images
 
7
  from PIL import Image
8
  from torch.utils.data import DataLoader
9
  from tqdm import tqdm
10
+ import tempfile
11
 
12
  from colpali_engine.models import ColQwen2, ColQwen2Processor
13
 
 
62
 
63
  def convert_files(files):
64
  images = []
65
+ with tempfile.TemporaryDirectory() as temp_dir:
66
+ for f in files:
67
+ file_path = f['filepath']
68
+ temp_file_path = os.path.join(temp_dir, f['name'])
69
+ shutil.copy(file_path, temp_file_path)
70
+ try:
71
+ images.extend(pdf2image.convert_from_path(temp_file_path, thread_count=4))
72
+ except Exception as e:
73
+ print(f"Error converting {temp_file_path}: {e}")
74
+ # Handle the error, maybe skip the file or raise a Gradio error
75
  if len(images) >= 150:
76
  raise gr.Error("The number of images in the dataset should be less than 150.")
77
  return images