Spaces:
Running
Running
Update retrival.py
Browse files- retrival.py +8 -3
retrival.py
CHANGED
@@ -136,6 +136,7 @@ def load_document(data_path):
|
|
136 |
|
137 |
# Loop over tables and match text from the same document and page
|
138 |
|
|
|
139 |
for doc in processed_documents:
|
140 |
cnt=1 # count for storing number of the table
|
141 |
for table_metadata in doc.get("tables", {}).get("Metadata", []):
|
@@ -180,6 +181,7 @@ def load_document(data_path):
|
|
180 |
}
|
181 |
)
|
182 |
)
|
|
|
183 |
|
184 |
# Initialize a structure to group content by doc_id
|
185 |
grouped_by_doc_id = defaultdict(lambda: {
|
@@ -224,6 +226,7 @@ def load_document(data_path):
|
|
224 |
|
225 |
|
226 |
#Dirctory loader for loading the text data only to specific db
|
|
|
227 |
loader = DirectoryLoader(data_path, glob="*.*")
|
228 |
documents = loader.load()
|
229 |
|
@@ -234,8 +237,9 @@ def load_document(data_path):
|
|
234 |
path=doc.metadata.get("source")
|
235 |
match = re.search(r'([^\\]+\.[^\\]+)$', path)
|
236 |
doc.metadata.update({"filename":match.group(1)})
|
237 |
-
|
238 |
-
|
|
|
239 |
#documents,processed_documents,table_document = load_document(data_path)
|
240 |
|
241 |
|
@@ -395,7 +399,8 @@ def generate_data_store(file_path, db_name):
|
|
395 |
print(f"Filepath ===> {file_path} DB Name ====> {db_name}")
|
396 |
|
397 |
try:
|
398 |
-
documents,grouped_documents = load_document(file_path)
|
|
|
399 |
print("Documents loaded successfully.")
|
400 |
except Exception as e:
|
401 |
print(f"Error loading documents: {e}")
|
|
|
136 |
|
137 |
# Loop over tables and match text from the same document and page
|
138 |
|
139 |
+
'''
|
140 |
for doc in processed_documents:
|
141 |
cnt=1 # count for storing number of the table
|
142 |
for table_metadata in doc.get("tables", {}).get("Metadata", []):
|
|
|
181 |
}
|
182 |
)
|
183 |
)
|
184 |
+
'''
|
185 |
|
186 |
# Initialize a structure to group content by doc_id
|
187 |
grouped_by_doc_id = defaultdict(lambda: {
|
|
|
226 |
|
227 |
|
228 |
#Dirctory loader for loading the text data only to specific db
|
229 |
+
'''
|
230 |
loader = DirectoryLoader(data_path, glob="*.*")
|
231 |
documents = loader.load()
|
232 |
|
|
|
237 |
path=doc.metadata.get("source")
|
238 |
match = re.search(r'([^\\]+\.[^\\]+)$', path)
|
239 |
doc.metadata.update({"filename":match.group(1)})
|
240 |
+
return documents,
|
241 |
+
'''
|
242 |
+
return grouped_documents
|
243 |
#documents,processed_documents,table_document = load_document(data_path)
|
244 |
|
245 |
|
|
|
399 |
print(f"Filepath ===> {file_path} DB Name ====> {db_name}")
|
400 |
|
401 |
try:
|
402 |
+
#documents,grouped_documents = load_document(file_path)
|
403 |
+
grouped_documents = load_document(file_path)
|
404 |
print("Documents loaded successfully.")
|
405 |
except Exception as e:
|
406 |
print(f"Error loading documents: {e}")
|