Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -72,11 +72,7 @@ def update_vectors(files, parser):
|
|
72 |
|
73 |
if not files:
|
74 |
logging.warning("No files provided for update_vectors")
|
75 |
-
return "Please upload at least one PDF file.", gr.
|
76 |
-
choices=[doc["name"] for doc in uploaded_documents],
|
77 |
-
value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
|
78 |
-
label="Select documents to query"
|
79 |
-
)
|
80 |
|
81 |
embed = get_embeddings()
|
82 |
total_chunks = 0
|
@@ -89,7 +85,6 @@ def update_vectors(files, parser):
|
|
89 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
90 |
all_data.extend(data)
|
91 |
total_chunks += len(data)
|
92 |
-
# Append new documents instead of replacing
|
93 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
94 |
uploaded_documents.append({"name": file.name, "selected": True})
|
95 |
logging.info(f"Added new document to uploaded_documents: {file.name}")
|
@@ -111,11 +106,49 @@ def update_vectors(files, parser):
|
|
111 |
database.save_local("faiss_database")
|
112 |
logging.info("FAISS database saved")
|
113 |
|
114 |
-
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
121 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
@@ -531,11 +564,36 @@ with demo:
|
|
531 |
|
532 |
update_output = gr.Textbox(label="Update Status")
|
533 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
534 |
# Update both the output text and the document selector
|
535 |
-
update_button.click(
|
536 |
-
|
537 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
539 |
gr.Markdown(
|
540 |
"""
|
541 |
## How to use
|
|
|
72 |
|
73 |
if not files:
|
74 |
logging.warning("No files provided for update_vectors")
|
75 |
+
return "Please upload at least one PDF file.", gr.update(choices=[], value=[])
|
|
|
|
|
|
|
|
|
76 |
|
77 |
embed = get_embeddings()
|
78 |
total_chunks = 0
|
|
|
85 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
86 |
all_data.extend(data)
|
87 |
total_chunks += len(data)
|
|
|
88 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
89 |
uploaded_documents.append({"name": file.name, "selected": True})
|
90 |
logging.info(f"Added new document to uploaded_documents: {file.name}")
|
|
|
106 |
database.save_local("faiss_database")
|
107 |
logging.info("FAISS database saved")
|
108 |
|
109 |
+
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
|
110 |
+
|
111 |
+
# Add this new function to handle file deletion
|
112 |
+
def delete_file(file_name):
|
113 |
+
global uploaded_documents
|
114 |
+
logging.info(f"Attempting to delete file: {file_name}")
|
115 |
+
|
116 |
+
# Remove the file from uploaded_documents
|
117 |
+
uploaded_documents = [doc for doc in uploaded_documents if doc["name"] != file_name]
|
118 |
+
|
119 |
+
# Remove the file from the file system if it exists
|
120 |
+
if os.path.exists(file_name):
|
121 |
+
os.remove(file_name)
|
122 |
+
logging.info(f"Deleted file: {file_name}")
|
123 |
+
else:
|
124 |
+
logging.warning(f"File not found: {file_name}")
|
125 |
+
|
126 |
+
# Rebuild the FAISS database
|
127 |
+
rebuild_faiss_database()
|
128 |
+
|
129 |
+
return gr.update(value=[doc["name"] for doc in uploaded_documents], choices=[doc["name"] for doc in uploaded_documents])
|
130 |
+
|
131 |
+
def rebuild_faiss_database():
|
132 |
+
logging.info("Rebuilding FAISS database")
|
133 |
+
embed = get_embeddings()
|
134 |
+
all_data = []
|
135 |
+
|
136 |
+
for doc in uploaded_documents:
|
137 |
+
try:
|
138 |
+
data = load_document(NamedTemporaryFile(delete=False, suffix=".pdf", mode="wb"), "llamaparse")
|
139 |
+
all_data.extend(data)
|
140 |
+
except Exception as e:
|
141 |
+
logging.error(f"Error processing file {doc['name']}: {str(e)}")
|
142 |
+
|
143 |
+
if all_data:
|
144 |
+
database = FAISS.from_documents(all_data, embed)
|
145 |
+
database.save_local("faiss_database")
|
146 |
+
logging.info("FAISS database rebuilt and saved")
|
147 |
+
else:
|
148 |
+
if os.path.exists("faiss_database"):
|
149 |
+
os.remove("faiss_database")
|
150 |
+
logging.info("No documents left, removed FAISS database")
|
151 |
+
|
152 |
|
153 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
154 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
|
|
564 |
|
565 |
update_output = gr.Textbox(label="Update Status")
|
566 |
|
567 |
+
# Create a new row for displaying uploaded files with delete buttons
|
568 |
+
with gr.Row():
|
569 |
+
uploaded_files = gr.CheckboxGroup(label="Uploaded Documents", interactive=True)
|
570 |
+
delete_buttons = gr.Dataset(
|
571 |
+
components=[gr.Button(value="Delete")],
|
572 |
+
label="Actions",
|
573 |
+
samples=[],
|
574 |
+
)
|
575 |
+
|
576 |
# Update both the output text and the document selector
|
577 |
+
update_button.click(
|
578 |
+
update_vectors,
|
579 |
+
inputs=[file_input, parser_dropdown],
|
580 |
+
outputs=[update_output, uploaded_files]
|
581 |
+
)
|
582 |
+
|
583 |
+
# Handle file deletion
|
584 |
+
delete_buttons.click(
|
585 |
+
delete_file,
|
586 |
+
inputs=[delete_buttons],
|
587 |
+
outputs=[uploaded_files]
|
588 |
+
)
|
589 |
|
590 |
+
# Update the document selector in the chat interface
|
591 |
+
uploaded_files.change(
|
592 |
+
lambda x: gr.update(choices=x, value=x),
|
593 |
+
inputs=[uploaded_files],
|
594 |
+
outputs=[document_selector]
|
595 |
+
)
|
596 |
+
|
597 |
gr.Markdown(
|
598 |
"""
|
599 |
## How to use
|