Shreyas094 commited on
Commit
d5ec470
·
verified ·
1 Parent(s): fa885a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -20
app.py CHANGED
@@ -47,18 +47,20 @@ llama_parser = LlamaParse(
47
  )
48
 
49
  def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
50
- """Loads and splits the document into pages."""
 
 
51
  if parser == "pypdf":
52
- loader = PyPDFLoader(file.name)
53
  return loader.load_and_split()
54
  elif parser == "llamaparse":
55
  try:
56
- documents = llama_parser.load_data(file.name)
57
- return [Document(page_content=doc.text, metadata={"source": file.name}) for doc in documents]
58
  except Exception as e:
59
  print(f"Error using Llama Parse: {str(e)}")
60
  print("Falling back to PyPDF parser")
61
- loader = PyPDFLoader(file.name)
62
  return loader.load_and_split()
63
  else:
64
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
@@ -108,6 +110,10 @@ def update_vectors(files, parser):
108
 
109
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
110
 
 
 
 
 
111
  # Add this new function to handle file deletion
112
  def delete_file(file_name):
113
  global uploaded_documents
@@ -117,11 +123,12 @@ def delete_file(file_name):
117
  uploaded_documents = [doc for doc in uploaded_documents if doc["name"] != file_name]
118
 
119
  # Remove the file from the file system if it exists
120
- if os.path.exists(file_name):
121
- os.remove(file_name)
122
- logging.info(f"Deleted file: {file_name}")
 
123
  else:
124
- logging.warning(f"File not found: {file_name}")
125
 
126
  # Rebuild the FAISS database
127
  rebuild_faiss_database()
@@ -135,7 +142,8 @@ def rebuild_faiss_database():
135
 
136
  for doc in uploaded_documents:
137
  try:
138
- data = load_document(NamedTemporaryFile(delete=False, suffix=".pdf", mode="wb"), "llamaparse")
 
139
  all_data.extend(data)
140
  except Exception as e:
141
  logging.error(f"Error processing file {doc['name']}: {str(e)}")
@@ -146,10 +154,9 @@ def rebuild_faiss_database():
146
  logging.info("FAISS database rebuilt and saved")
147
  else:
148
  if os.path.exists("faiss_database"):
149
- os.remove("faiss_database")
150
  logging.info("No documents left, removed FAISS database")
151
 
152
-
153
  def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
154
  print(f"Starting generate_chunked_response with {num_calls} calls")
155
  full_response = ""
@@ -567,11 +574,7 @@ with demo:
567
  # Create a new row for displaying uploaded files with delete buttons
568
  with gr.Row():
569
  uploaded_files = gr.CheckboxGroup(label="Uploaded Documents", interactive=True)
570
- delete_buttons = gr.Dataset(
571
- components=[gr.Button(value="Delete")],
572
- label="Actions",
573
- samples=[],
574
- )
575
 
576
  # Update both the output text and the document selector
577
  update_button.click(
@@ -581,9 +584,9 @@ with demo:
581
  )
582
 
583
  # Handle file deletion
584
- delete_buttons.click(
585
- delete_file,
586
- inputs=[delete_buttons],
587
  outputs=[uploaded_files]
588
  )
589
 
 
47
  )
48
 
49
  def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
50
+ file_path = os.path.join(UPLOAD_FOLDER, file.name)
51
+ shutil.copy(file.name, file_path)
52
+
53
  if parser == "pypdf":
54
+ loader = PyPDFLoader(file_path)
55
  return loader.load_and_split()
56
  elif parser == "llamaparse":
57
  try:
58
+ documents = llama_parser.load_data(file_path)
59
+ return [Document(page_content=doc.text, metadata={"source": file_path}) for doc in documents]
60
  except Exception as e:
61
  print(f"Error using Llama Parse: {str(e)}")
62
  print("Falling back to PyPDF parser")
63
+ loader = PyPDFLoader(file_path)
64
  return loader.load_and_split()
65
  else:
66
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
 
110
 
111
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.update(choices=[doc["name"] for doc in uploaded_documents], value=[doc["name"] for doc in uploaded_documents if doc["selected"]])
112
 
113
+ UPLOAD_FOLDER = "uploaded_files"
114
+ if not os.path.exists(UPLOAD_FOLDER):
115
+ os.makedirs(UPLOAD_FOLDER)
116
+
117
  # Add this new function to handle file deletion
118
  def delete_file(file_name):
119
  global uploaded_documents
 
123
  uploaded_documents = [doc for doc in uploaded_documents if doc["name"] != file_name]
124
 
125
  # Remove the file from the file system if it exists
126
+ file_path = os.path.join(UPLOAD_FOLDER, file_name)
127
+ if os.path.exists(file_path):
128
+ os.remove(file_path)
129
+ logging.info(f"Deleted file: {file_path}")
130
  else:
131
+ logging.warning(f"File not found: {file_path}")
132
 
133
  # Rebuild the FAISS database
134
  rebuild_faiss_database()
 
142
 
143
  for doc in uploaded_documents:
144
  try:
145
+ file_path = os.path.join(UPLOAD_FOLDER, doc["name"])
146
+ data = load_document(NamedTemporaryFile(delete=False, suffix=".pdf", mode="wb", dir=UPLOAD_FOLDER, name=doc["name"]), "llamaparse")
147
  all_data.extend(data)
148
  except Exception as e:
149
  logging.error(f"Error processing file {doc['name']}: {str(e)}")
 
154
  logging.info("FAISS database rebuilt and saved")
155
  else:
156
  if os.path.exists("faiss_database"):
157
+ shutil.rmtree("faiss_database")
158
  logging.info("No documents left, removed FAISS database")
159
 
 
160
  def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
161
  print(f"Starting generate_chunked_response with {num_calls} calls")
162
  full_response = ""
 
574
  # Create a new row for displaying uploaded files with delete buttons
575
  with gr.Row():
576
  uploaded_files = gr.CheckboxGroup(label="Uploaded Documents", interactive=True)
577
+ delete_button = gr.Button("Delete Selected")
 
 
 
 
578
 
579
  # Update both the output text and the document selector
580
  update_button.click(
 
584
  )
585
 
586
  # Handle file deletion
587
+ delete_button.click(
588
+ lambda selected: [delete_file(file) for file in selected],
589
+ inputs=[uploaded_files],
590
  outputs=[uploaded_files]
591
  )
592