Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -47,7 +47,7 @@ llama_parser = LlamaParse(
|
|
47 |
)
|
48 |
|
49 |
def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
|
50 |
-
file_path = os.path.join(UPLOAD_FOLDER, file.name)
|
51 |
shutil.copy(file.name, file_path)
|
52 |
|
53 |
if parser == "pypdf":
|
@@ -58,8 +58,8 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
|
|
58 |
documents = llama_parser.load_data(file_path)
|
59 |
return [Document(page_content=doc.text, metadata={"source": file_path}) for doc in documents]
|
60 |
except Exception as e:
|
61 |
-
|
62 |
-
|
63 |
loader = PyPDFLoader(file_path)
|
64 |
return loader.load_and_split()
|
65 |
else:
|
@@ -97,18 +97,21 @@ def update_vectors(files, parser):
|
|
97 |
|
98 |
logging.info(f"Total chunks processed: {total_chunks}")
|
99 |
|
100 |
-
if
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
else:
|
105 |
-
|
106 |
-
database = FAISS.from_documents(all_data, embed)
|
107 |
-
|
108 |
-
database.save_local("faiss_database")
|
109 |
-
logging.info("FAISS database saved")
|
110 |
-
|
111 |
-
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
|
112 |
|
113 |
UPLOAD_FOLDER = "uploaded_files"
|
114 |
if not os.path.exists(UPLOAD_FOLDER):
|
@@ -143,8 +146,12 @@ def rebuild_faiss_database():
|
|
143 |
for doc in uploaded_documents:
|
144 |
try:
|
145 |
file_path = os.path.join(UPLOAD_FOLDER, doc["name"])
|
146 |
-
|
|
|
|
|
|
|
147 |
all_data.extend(data)
|
|
|
148 |
except Exception as e:
|
149 |
logging.error(f"Error processing file {doc['name']}: {str(e)}")
|
150 |
|
|
|
47 |
)
|
48 |
|
49 |
def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
|
50 |
+
file_path = os.path.join(UPLOAD_FOLDER, os.path.basename(file.name))
|
51 |
shutil.copy(file.name, file_path)
|
52 |
|
53 |
if parser == "pypdf":
|
|
|
58 |
documents = llama_parser.load_data(file_path)
|
59 |
return [Document(page_content=doc.text, metadata={"source": file_path}) for doc in documents]
|
60 |
except Exception as e:
|
61 |
+
logging.error(f"Error using Llama Parse: {str(e)}")
|
62 |
+
logging.info("Falling back to PyPDF parser")
|
63 |
loader = PyPDFLoader(file_path)
|
64 |
return loader.load_and_split()
|
65 |
else:
|
|
|
97 |
|
98 |
logging.info(f"Total chunks processed: {total_chunks}")
|
99 |
|
100 |
+
if all_data:
|
101 |
+
if os.path.exists("faiss_database"):
|
102 |
+
logging.info("Updating existing FAISS database")
|
103 |
+
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
104 |
+
database.add_documents(all_data)
|
105 |
+
else:
|
106 |
+
logging.info("Creating new FAISS database")
|
107 |
+
database = FAISS.from_documents(all_data, embed)
|
108 |
+
|
109 |
+
database.save_local("faiss_database")
|
110 |
+
logging.info("FAISS database saved")
|
111 |
+
|
112 |
+
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
|
113 |
else:
|
114 |
+
return "No data was processed. Please check your files and try again.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
UPLOAD_FOLDER = "uploaded_files"
|
117 |
if not os.path.exists(UPLOAD_FOLDER):
|
|
|
146 |
for doc in uploaded_documents:
|
147 |
try:
|
148 |
file_path = os.path.join(UPLOAD_FOLDER, doc["name"])
|
149 |
+
temp_file = NamedTemporaryFile(delete=False, suffix=".pdf", dir=UPLOAD_FOLDER)
|
150 |
+
temp_file.write(open(file_path, 'rb').read())
|
151 |
+
temp_file.close()
|
152 |
+
data = load_document(temp_file, "llamaparse")
|
153 |
all_data.extend(data)
|
154 |
+
os.unlink(temp_file.name)
|
155 |
except Exception as e:
|
156 |
logging.error(f"Error processing file {doc['name']}: {str(e)}")
|
157 |
|