Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -47,18 +47,20 @@ llama_parser = LlamaParse(
|
|
47 |
)
|
48 |
|
49 |
def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
|
50 |
-
|
|
|
|
|
51 |
if parser == "pypdf":
|
52 |
-
loader = PyPDFLoader(
|
53 |
return loader.load_and_split()
|
54 |
elif parser == "llamaparse":
|
55 |
try:
|
56 |
-
documents = llama_parser.load_data(
|
57 |
-
return [Document(page_content=doc.text, metadata={"source":
|
58 |
except Exception as e:
|
59 |
print(f"Error using Llama Parse: {str(e)}")
|
60 |
print("Falling back to PyPDF parser")
|
61 |
-
loader = PyPDFLoader(
|
62 |
return loader.load_and_split()
|
63 |
else:
|
64 |
raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
|
@@ -108,6 +110,10 @@ def update_vectors(files, parser):
|
|
108 |
|
109 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
|
110 |
|
|
|
|
|
|
|
|
|
111 |
# Add this new function to handle file deletion
|
112 |
def delete_file(file_name):
|
113 |
global uploaded_documents
|
@@ -117,11 +123,12 @@ def delete_file(file_name):
|
|
117 |
uploaded_documents = [doc for doc in uploaded_documents if doc["name"] != file_name]
|
118 |
|
119 |
# Remove the file from the file system if it exists
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
123 |
else:
|
124 |
-
logging.warning(f"File not found: {
|
125 |
|
126 |
# Rebuild the FAISS database
|
127 |
rebuild_faiss_database()
|
@@ -135,7 +142,8 @@ def rebuild_faiss_database():
|
|
135 |
|
136 |
for doc in uploaded_documents:
|
137 |
try:
|
138 |
-
|
|
|
139 |
all_data.extend(data)
|
140 |
except Exception as e:
|
141 |
logging.error(f"Error processing file {doc['name']}: {str(e)}")
|
@@ -146,10 +154,9 @@ def rebuild_faiss_database():
|
|
146 |
logging.info("FAISS database rebuilt and saved")
|
147 |
else:
|
148 |
if os.path.exists("faiss_database"):
|
149 |
-
|
150 |
logging.info("No documents left, removed FAISS database")
|
151 |
|
152 |
-
|
153 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
154 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
155 |
full_response = ""
|
@@ -567,11 +574,7 @@ with demo:
|
|
567 |
# Create a new row for displaying uploaded files with delete buttons
|
568 |
with gr.Row():
|
569 |
uploaded_files = gr.CheckboxGroup(label="Uploaded Documents", interactive=True)
|
570 |
-
|
571 |
-
components=[gr.Button(value="Delete")],
|
572 |
-
label="Actions",
|
573 |
-
samples=[],
|
574 |
-
)
|
575 |
|
576 |
# Update both the output text and the document selector
|
577 |
update_button.click(
|
@@ -581,9 +584,9 @@ with demo:
|
|
581 |
)
|
582 |
|
583 |
# Handle file deletion
|
584 |
-
|
585 |
-
delete_file,
|
586 |
-
inputs=[
|
587 |
outputs=[uploaded_files]
|
588 |
)
|
589 |
|
|
|
47 |
)
|
48 |
|
49 |
def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
|
50 |
+
file_path = os.path.join(UPLOAD_FOLDER, file.name)
|
51 |
+
shutil.copy(file.name, file_path)
|
52 |
+
|
53 |
if parser == "pypdf":
|
54 |
+
loader = PyPDFLoader(file_path)
|
55 |
return loader.load_and_split()
|
56 |
elif parser == "llamaparse":
|
57 |
try:
|
58 |
+
documents = llama_parser.load_data(file_path)
|
59 |
+
return [Document(page_content=doc.text, metadata={"source": file_path}) for doc in documents]
|
60 |
except Exception as e:
|
61 |
print(f"Error using Llama Parse: {str(e)}")
|
62 |
print("Falling back to PyPDF parser")
|
63 |
+
loader = PyPDFLoader(file_path)
|
64 |
return loader.load_and_split()
|
65 |
else:
|
66 |
raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
|
|
|
110 |
|
111 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
|
112 |
|
113 |
+
UPLOAD_FOLDER = "uploaded_files"
|
114 |
+
if not os.path.exists(UPLOAD_FOLDER):
|
115 |
+
os.makedirs(UPLOAD_FOLDER)
|
116 |
+
|
117 |
# Add this new function to handle file deletion
|
118 |
def delete_file(file_name):
|
119 |
global uploaded_documents
|
|
|
123 |
uploaded_documents = [doc for doc in uploaded_documents if doc["name"] != file_name]
|
124 |
|
125 |
# Remove the file from the file system if it exists
|
126 |
+
file_path = os.path.join(UPLOAD_FOLDER, file_name)
|
127 |
+
if os.path.exists(file_path):
|
128 |
+
os.remove(file_path)
|
129 |
+
logging.info(f"Deleted file: {file_path}")
|
130 |
else:
|
131 |
+
logging.warning(f"File not found: {file_path}")
|
132 |
|
133 |
# Rebuild the FAISS database
|
134 |
rebuild_faiss_database()
|
|
|
142 |
|
143 |
for doc in uploaded_documents:
|
144 |
try:
|
145 |
+
file_path = os.path.join(UPLOAD_FOLDER, doc["name"])
|
146 |
+
data = load_document(NamedTemporaryFile(delete=False, suffix=".pdf", mode="wb", dir=UPLOAD_FOLDER, name=doc["name"]), "llamaparse")
|
147 |
all_data.extend(data)
|
148 |
except Exception as e:
|
149 |
logging.error(f"Error processing file {doc['name']}: {str(e)}")
|
|
|
154 |
logging.info("FAISS database rebuilt and saved")
|
155 |
else:
|
156 |
if os.path.exists("faiss_database"):
|
157 |
+
shutil.rmtree("faiss_database")
|
158 |
logging.info("No documents left, removed FAISS database")
|
159 |
|
|
|
160 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
161 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
162 |
full_response = ""
|
|
|
574 |
# Create a new row for displaying uploaded files with delete buttons
|
575 |
with gr.Row():
|
576 |
uploaded_files = gr.CheckboxGroup(label="Uploaded Documents", interactive=True)
|
577 |
+
delete_button = gr.Button("Delete Selected")
|
|
|
|
|
|
|
|
|
578 |
|
579 |
# Update both the output text and the document selector
|
580 |
update_button.click(
|
|
|
584 |
)
|
585 |
|
586 |
# Handle file deletion
|
587 |
+
delete_button.click(
|
588 |
+
lambda selected: [delete_file(file) for file in selected],
|
589 |
+
inputs=[uploaded_files],
|
590 |
outputs=[uploaded_files]
|
591 |
)
|
592 |
|