WebashalarForML commited on
Commit
541ce76
·
verified ·
1 Parent(s): a41be00

Update retrival.py

Browse files
Files changed (1) hide show
  1. retrival.py +8 -3
retrival.py CHANGED
@@ -136,6 +136,7 @@ def load_document(data_path):
136
 
137
  # Loop over tables and match text from the same document and page
138
 
 
139
  for doc in processed_documents:
140
  cnt=1 # count for storing number of the table
141
  for table_metadata in doc.get("tables", {}).get("Metadata", []):
@@ -180,6 +181,7 @@ def load_document(data_path):
180
  }
181
  )
182
  )
 
183
 
184
  # Initialize a structure to group content by doc_id
185
  grouped_by_doc_id = defaultdict(lambda: {
@@ -224,6 +226,7 @@ def load_document(data_path):
224
 
225
 
226
  #Dirctory loader for loading the text data only to specific db
 
227
  loader = DirectoryLoader(data_path, glob="*.*")
228
  documents = loader.load()
229
 
@@ -234,8 +237,9 @@ def load_document(data_path):
234
  path=doc.metadata.get("source")
235
  match = re.search(r'([^\\]+\.[^\\]+)$', path)
236
  doc.metadata.update({"filename":match.group(1)})
237
-
238
- return documents,grouped_documents
 
239
  #documents,processed_documents,table_document = load_document(data_path)
240
 
241
 
@@ -395,7 +399,8 @@ def generate_data_store(file_path, db_name):
395
  print(f"Filepath ===> {file_path} DB Name ====> {db_name}")
396
 
397
  try:
398
- documents,grouped_documents = load_document(file_path)
 
399
  print("Documents loaded successfully.")
400
  except Exception as e:
401
  print(f"Error loading documents: {e}")
 
136
 
137
  # Loop over tables and match text from the same document and page
138
 
139
+ '''
140
  for doc in processed_documents:
141
  cnt=1 # count for storing number of the table
142
  for table_metadata in doc.get("tables", {}).get("Metadata", []):
 
181
  }
182
  )
183
  )
184
+ '''
185
 
186
  # Initialize a structure to group content by doc_id
187
  grouped_by_doc_id = defaultdict(lambda: {
 
226
 
227
 
228
  #Dirctory loader for loading the text data only to specific db
229
+ '''
230
  loader = DirectoryLoader(data_path, glob="*.*")
231
  documents = loader.load()
232
 
 
237
  path=doc.metadata.get("source")
238
  match = re.search(r'([^\\]+\.[^\\]+)$', path)
239
  doc.metadata.update({"filename":match.group(1)})
240
+ return documents,
241
+ '''
242
+ return grouped_documents
243
  #documents,processed_documents,table_document = load_document(data_path)
244
 
245
 
 
399
  print(f"Filepath ===> {file_path} DB Name ====> {db_name}")
400
 
401
  try:
402
+ #documents,grouped_documents = load_document(file_path)
403
+ grouped_documents = load_document(file_path)
404
  print("Documents loaded successfully.")
405
  except Exception as e:
406
  print(f"Error loading documents: {e}")