Shreyas094 commited on
Commit
1bf52e0
·
verified ·
1 Parent(s): d5ec470

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -15
app.py CHANGED
@@ -47,7 +47,7 @@ llama_parser = LlamaParse(
47
  )
48
 
49
  def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
50
- file_path = os.path.join(UPLOAD_FOLDER, file.name)
51
  shutil.copy(file.name, file_path)
52
 
53
  if parser == "pypdf":
@@ -58,8 +58,8 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
58
  documents = llama_parser.load_data(file_path)
59
  return [Document(page_content=doc.text, metadata={"source": file_path}) for doc in documents]
60
  except Exception as e:
61
- print(f"Error using Llama Parse: {str(e)}")
62
- print("Falling back to PyPDF parser")
63
  loader = PyPDFLoader(file_path)
64
  return loader.load_and_split()
65
  else:
@@ -97,18 +97,21 @@ def update_vectors(files, parser):
97
 
98
  logging.info(f"Total chunks processed: {total_chunks}")
99
 
100
- if os.path.exists("faiss_database"):
101
- logging.info("Updating existing FAISS database")
102
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
103
- database.add_documents(all_data)
 
 
 
 
 
 
 
 
 
104
  else:
105
- logging.info("Creating new FAISS database")
106
- database = FAISS.from_documents(all_data, embed)
107
-
108
- database.save_local("faiss_database")
109
- logging.info("FAISS database saved")
110
-
111
- return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
112
 
113
  UPLOAD_FOLDER = "uploaded_files"
114
  if not os.path.exists(UPLOAD_FOLDER):
@@ -143,8 +146,12 @@ def rebuild_faiss_database():
143
  for doc in uploaded_documents:
144
  try:
145
  file_path = os.path.join(UPLOAD_FOLDER, doc["name"])
146
- data = load_document(NamedTemporaryFile(delete=False, suffix=".pdf", mode="wb", dir=UPLOAD_FOLDER, name=doc["name"]), "llamaparse")
 
 
 
147
  all_data.extend(data)
 
148
  except Exception as e:
149
  logging.error(f"Error processing file {doc['name']}: {str(e)}")
150
 
 
47
  )
48
 
49
  def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
50
+ file_path = os.path.join(UPLOAD_FOLDER, os.path.basename(file.name))
51
  shutil.copy(file.name, file_path)
52
 
53
  if parser == "pypdf":
 
58
  documents = llama_parser.load_data(file_path)
59
  return [Document(page_content=doc.text, metadata={"source": file_path}) for doc in documents]
60
  except Exception as e:
61
+ logging.error(f"Error using Llama Parse: {str(e)}")
62
+ logging.info("Falling back to PyPDF parser")
63
  loader = PyPDFLoader(file_path)
64
  return loader.load_and_split()
65
  else:
 
97
 
98
  logging.info(f"Total chunks processed: {total_chunks}")
99
 
100
+ if all_data:
101
+ if os.path.exists("faiss_database"):
102
+ logging.info("Updating existing FAISS database")
103
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
104
+ database.add_documents(all_data)
105
+ else:
106
+ logging.info("Creating new FAISS database")
107
+ database = FAISS.from_documents(all_data, embed)
108
+
109
+ database.save_local("faiss_database")
110
+ logging.info("FAISS database saved")
111
+
112
+ return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
113
  else:
114
+ return "No data was processed. Please check your files and try again.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
 
 
 
 
 
 
115
 
116
  UPLOAD_FOLDER = "uploaded_files"
117
  if not os.path.exists(UPLOAD_FOLDER):
 
146
  for doc in uploaded_documents:
147
  try:
148
  file_path = os.path.join(UPLOAD_FOLDER, doc["name"])
149
+ temp_file = NamedTemporaryFile(delete=False, suffix=".pdf", dir=UPLOAD_FOLDER)
150
+ temp_file.write(open(file_path, 'rb').read())
151
+ temp_file.close()
152
+ data = load_document(temp_file, "llamaparse")
153
  all_data.extend(data)
154
+ os.unlink(temp_file.name)
155
  except Exception as e:
156
  logging.error(f"Error processing file {doc['name']}: {str(e)}")
157