WebashalarForML commited on
Commit
3ee0a6f
·
verified ·
1 Parent(s): dc86108

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +344 -162
app.py CHANGED
@@ -1,9 +1,9 @@
1
  from flask import Flask, render_template, request, redirect, url_for, session, flash
2
  import os
3
  from werkzeug.utils import secure_filename
4
- #from retrival import generate_data_store
5
- from retrival import generate_data_store #,add_document_to_existing_db, delete_chunks_by_source
6
  from langchain_community.vectorstores import Chroma
 
7
  from langchain.embeddings import HuggingFaceEmbeddings
8
  from langchain.prompts import ChatPromptTemplate
9
  from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
@@ -48,6 +48,11 @@ os.makedirs(TABLE_DB_FOLDER, exist_ok=True)
48
  CHROMA_PATH = None
49
  TABLE_PATH = None
50
 
 
 
 
 
 
51
  PROMPT_TEMPLATE_DOC = """
52
  <s>[INST] You are a retrieval-augmented generation (RAG) assistant. Your task is to generate a response strictly based on the given context. Follow these instructions:
53
 
@@ -95,190 +100,229 @@ Response:
95
  [/INST]
96
 
97
  """
98
-
99
-
100
- #HFT = os.getenv('HF_TOKEN')
101
- #client = InferenceClient(api_key=HFT)
102
 
103
  @app.route('/', methods=['GET'])
104
  def home():
105
  return render_template('home.html')
106
 
 
 
 
 
107
  @app.route('/chat', methods=['GET', 'POST'])
108
  def chat():
 
109
 
110
- if 'history' not in session:
111
- session['history'] = []
112
- print("sessionhist1",session['history'])
113
-
114
- global CHROMA_PATH
115
- global TABLE_PATH
116
-
117
- old_db = session.get('old_db', None)
118
- print(f"Selected DB: {CHROMA_PATH}")
119
-
120
- # if old_db != None:
121
- # if CHROMA_PATH != old_db:
122
- # session['history'] = []
123
-
124
- #print("sessionhist1",session['history'])
125
-
126
- if request.method == 'POST':
127
- query_text = request.form['query_text']
128
- if CHROMA_PATH is None:
129
- flash("Please select a database first!", "error")
130
- return redirect(url_for('list_dbs'))
131
 
132
-
133
- # Load the selected Document Database
134
- embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
135
- #embedding_function = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
136
- db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
137
- # Convert the query to its embedding vector
138
- query_embedding = embedding_function.embed_query(query_text)
139
- if isinstance(query_embedding, float):
140
- query_embedding = [query_embedding]
141
- # print(f"Query embedding: {query_embedding}")
142
- # print(f"Type of query embedding: {type(query_embedding)}")
143
- # print(f"Length of query embedding: {len(query_embedding) if isinstance(query_embedding, (list, np.ndarray)) else 'Not applicable'}")
144
- results_document = db.similarity_search_by_vector_with_relevance_scores(
145
- embedding=query_embedding, # Pass the query embedding
146
- k=3,
147
- #filter=filter_condition # Pass the filter condition
148
- )
149
-
150
- print("results------------------->",results_document)
151
- print("============================================")
152
- print("============================================")
153
-
154
- context_text_document = " \n\n###\n\n ".join(
155
- [f"Source: {doc.metadata.get('source', '')} Page_content:{doc.page_content}\n" for doc, _score in results_document]
156
- )
157
-
158
- # Loading Table Database only if available
159
- if TABLE_PATH is not None:
160
- #embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
161
- embedding_function = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
162
- tdb = Chroma(persist_directory=TABLE_PATH, embedding_function=embedding_function)
163
- results_table = tdb.similarity_search_by_vector_with_relevance_scores(
164
  embedding=query_embedding, # Pass the query embedding
165
- k=2
166
  #filter=filter_condition # Pass the filter condition
167
  )
168
- print("results------------------->",results_table)
169
- context_text_table = "\n\n---\n\n".join([doc.page_content for doc, _score in results_table])
170
-
171
- # Prepare the prompt and query the model
172
- prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE_TAB)
173
- prompt = prompt_template.format(context=context_text_document,table=context_text_table,question=query_text)
174
- #prompt = prompt_template.format(context=context_text_document,table=context_text_table, question=query_text)
175
- print("results------------------->",prompt)
176
- else:
177
- # Prepare the prompt and query the model
178
- prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE_DOC)
179
- prompt = prompt_template.format(context=context_text_document,question=query_text)
180
- #prompt = prompt_template.format(context=context_text_document,table=context_text_table, question=query_text)
181
- print("results------------------->",prompt)
182
-
183
- #Model Defining and its use
184
- repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
185
- HFT = os.environ["HF_TOKEN"]
186
- llm = HuggingFaceEndpoint(
187
- repo_id=repo_id,
188
- #max_tokens=3000,
189
- max_new_tokens=2000,
190
- task = "text-generation",
191
- temperature=0.8,
192
- huggingfacehub_api_token=HFT,
193
- )
194
- data= llm.invoke(prompt)
195
- #data = response.choices[0].message.content
196
-
197
- # filtering the uneccessary context.
198
- if re.search(r'\bmention\b|\bnot mention\b|\bnot mentioned\b|\bnot contain\b|\bnot include\b|\bnot provide\b|\bdoes not\b|\bnot explicitly\b|\bnot explicitly mentioned\b', data, re.IGNORECASE):
199
- data = "We do not have information related to your query on our end."
200
-
201
- # Save the query and answer to the session history
202
- session['history'].append((query_text, data))
203
-
204
- # Mark the session as modified to ensure it gets saved
205
- session.modified = True
206
- print("sessionhist2",session['history'])
207
 
208
- return render_template('chat.html', query_text=query_text, answer=data, history=session['history'],old_db=CHROMA_PATH)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  return render_template('chat.html', history=session['history'], old_db=CHROMA_PATH)
211
 
 
 
 
 
212
  @app.route('/create-db', methods=['GET', 'POST'])
213
  def create_db():
214
- if request.method == 'POST':
215
- db_name = request.form.get('db_name', '').strip()
216
- if not db_name:
217
- return "Database name is required", 400
218
-
219
- # Get uploaded files
220
- files = request.files.getlist('folder') # Folder uploads (multiple files)
221
- single_files = request.files.getlist('file') # Single file uploads
222
-
223
- print("==================folder==>", files)
224
- print("==================single_files==>", single_files)
225
-
226
- # Ensure at least one valid file is uploaded
227
- if not any(file.filename.strip() for file in files) and not any(file.filename.strip() for file in single_files):
228
- return "No files uploaded", 400
229
-
230
- # Create upload directory
231
- upload_base_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(db_name))
232
- print(f"Base Upload Path: {upload_base_path}")
233
- os.makedirs(upload_base_path, exist_ok=True)
234
-
235
- # Process single file uploads first (if any exist)
236
- if any(file.filename.strip() for file in single_files):
237
- for file in single_files:
238
- if file.filename.strip(): # Ensure the file is valid
239
- file_name = secure_filename(file.filename)
240
- file_path = os.path.join(upload_base_path, file_name)
241
- print(f"Saving single file to: {file_path}")
242
- file.save(file_path)
243
-
244
- # If single file is uploaded, skip folder processing
245
- print("Single file uploaded, skipping folder processing.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  asyncio.run(generate_data_store(upload_base_path, db_name))
 
 
 
 
247
  return redirect(url_for('list_dbs'))
248
-
249
- # Process folder files only if valid files exist
250
- if any(file.filename.strip() for file in files):
251
- for file in files:
252
- if file.filename.strip(): # Ensure it's a valid file
253
- file_name = secure_filename(file.filename)
254
- file_path = os.path.join(upload_base_path, file_name)
255
- print(f"Saving folder file to: {file_path}")
256
- file.save(file_path)
257
-
258
- # Generate datastore
259
- asyncio.run(generate_data_store(upload_base_path, db_name))
260
- return redirect(url_for('list_dbs'))
261
 
262
  return render_template('create_db.html')
263
 
 
 
 
264
 
265
  @app.route('/list-dbs', methods=['GET'])
266
  def list_dbs():
267
  vector_dbs = [name for name in os.listdir(VECTOR_DB_FOLDER) if os.path.isdir(os.path.join(VECTOR_DB_FOLDER, name))]
 
 
 
268
  return render_template('list_dbs.html', vector_dbs=vector_dbs)
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  @app.route('/select-db/<db_name>', methods=['POST'])
271
  def select_db(db_name):
272
- flash(f"{db_name} Database has been selected", "table_selected")
273
  #Selecting the Documnet Vector DB
274
  global CHROMA_PATH
275
  global TABLE_PATH
276
  print(f"Selected DB: {CHROMA_PATH}")
277
- print("-----------------------------------------------------1----")
278
  CHROMA_PATH = os.path.join(VECTOR_DB_FOLDER, db_name)
279
  CHROMA_PATH = CHROMA_PATH.replace("\\", "/")
280
  print(f"Selected DB: {CHROMA_PATH}")
281
- print("-----------------------------------------------------2----")
282
 
283
  # Selecting the Table Vector DB
284
  table_db_path = os.path.join(TABLE_DB_FOLDER, db_name)
@@ -288,26 +332,164 @@ def select_db(db_name):
288
 
289
  return redirect(url_for('chat'))
290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  @app.route('/update-dbs/<db_name>', methods=['GET','POST'])
292
  def update_db(db_name):
293
- if request.method == 'POST':
294
- db_name = request.form['db_name']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
- # Get all files from the uploaded folder
297
- files = request.files.getlist('folder')
298
- if not files:
299
- return "No files uploaded", 400
300
- print(f"Selected DB: {db_name}")
301
- DB_PATH = os.path.join(VECTOR_DB_FOLDER, db_name)
302
- DB_PATH = DB_PATH.replace("\\", "/")
303
- print(f"Selected DB: {DB_PATH}")
304
 
305
- generate_data_store(DB_PATH, db_name)
 
 
306
 
307
- return redirect(url_for('list_dbs'))
308
- return render_template('update_db.html')
 
 
309
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  if __name__ == "__main__":
311
- app.run(debug=False, use_reloader=False)
312
 
313
 
 
1
  from flask import Flask, render_template, request, redirect, url_for, session, flash
2
  import os
3
  from werkzeug.utils import secure_filename
4
+ from retrival import generate_data_store,update_data_store,approximate_bpe_token_counter
 
5
  from langchain_community.vectorstores import Chroma
6
+ import chromadb
7
  from langchain.embeddings import HuggingFaceEmbeddings
8
  from langchain.prompts import ChatPromptTemplate
9
  from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
 
48
  CHROMA_PATH = None
49
  TABLE_PATH = None
50
 
51
+ ########################################################################################################################################################
52
+ ####----------------------------------------------------------------- Prompt Templates ------------------------------------------------------------####
53
+ ########################################################################################################################################################
54
+
55
+ # prompt if the simple document
56
  PROMPT_TEMPLATE_DOC = """
57
  <s>[INST] You are a retrieval-augmented generation (RAG) assistant. Your task is to generate a response strictly based on the given context. Follow these instructions:
58
 
 
100
  [/INST]
101
 
102
  """
103
+ ########################################################################################################################################################
104
+ ####--------------------------------------------------------------- Flask APP ROUTES --------------------------------------------------------------####
105
+ ########################################################################################################################################################
 
106
 
107
  @app.route('/', methods=['GET'])
108
  def home():
109
  return render_template('home.html')
110
 
111
+ ########################################################################################################################################################
112
+ ####---------------------------------------------------------------- routes for chat --------------------------------------------------------------####
113
+ ########################################################################################################################################################
114
+
115
  @app.route('/chat', methods=['GET', 'POST'])
116
  def chat():
117
+ try:
118
 
119
+ if 'history' not in session:
120
+ session['history'] = []
121
+ print("sessionhist1",session['history'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ global CHROMA_PATH
124
+ global TABLE_PATH
125
+
126
+ old_db = session.get('old_db', None)
127
+ print(f"Selected DB: {CHROMA_PATH}")
128
+
129
+ # if old_db != None:
130
+ # if CHROMA_PATH != old_db:
131
+ # session['history'] = []
132
+
133
+ #print("sessionhist1",session['history'])
134
+
135
+ if request.method == 'POST':
136
+ query_text = request.form['query_text']
137
+ if CHROMA_PATH is None:
138
+ flash("Please select a database first!", "error")
139
+ return redirect(url_for('list_dbs'))
140
+
141
+
142
+ # Load the selected Document Database
143
+ embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
144
+ #embedding_function = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
145
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
146
+ # Convert the query to its embedding vector
147
+ query_embedding = embedding_function.embed_query(query_text)
148
+ if isinstance(query_embedding, float):
149
+ query_embedding = [query_embedding]
150
+
151
+ results_document = db.similarity_search_by_vector_with_relevance_scores(
 
 
 
152
  embedding=query_embedding, # Pass the query embedding
153
+ k=3,
154
  #filter=filter_condition # Pass the filter condition
155
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ print("results------------------->",results_document)
158
+ print("============================================")
159
+ print("============================================")
160
+
161
+ context_text_document = " \n\n###\n\n ".join(
162
+ [f"Source: {doc.metadata.get('source', '')} Page_content:{doc.page_content}\n" for doc, _score in results_document]
163
+ )
164
+
165
+ # Loading Table Database only if available
166
+ if TABLE_PATH is not None:
167
+ #embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
168
+ embedding_function = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
169
+ tdb = Chroma(persist_directory=TABLE_PATH, embedding_function=embedding_function)
170
+ results_table = tdb.similarity_search_by_vector_with_relevance_scores(
171
+ embedding=query_embedding, # Pass the query embedding
172
+ k=2
173
+ #filter=filter_condition # Pass the filter condition
174
+ )
175
+ print("results------------------->",results_table)
176
+ context_text_table = "\n\n---\n\n".join([doc.page_content for doc, _score in results_table])
177
+
178
+ # Prepare the prompt and query the model
179
+ prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE_TAB)
180
+ prompt = prompt_template.format(context=context_text_document,table=context_text_table,question=query_text)
181
+ #prompt = prompt_template.format(context=context_text_document,table=context_text_table, question=query_text)
182
+ print("results------------------->",prompt)
183
+ else:
184
+ # Prepare the prompt and query the model
185
+ prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE_DOC)
186
+ prompt = prompt_template.format(context=context_text_document,question=query_text)
187
+ #prompt = prompt_template.format(context=context_text_document,table=context_text_table, question=query_text)
188
+ print("results------------------->",prompt)
189
+
190
+ #Model Defining and its use
191
+ repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
192
+ HFT = os.environ["HF_TOKEN"]
193
+ llm = HuggingFaceEndpoint(
194
+ repo_id=repo_id,
195
+ #max_tokens=3000,
196
+ max_new_tokens=2000,
197
+ task = "text-generation",
198
+ temperature=0.8,
199
+ huggingfacehub_api_token=HFT,
200
+ )
201
+ data= llm.invoke(prompt)
202
+ #data= llm(prompt)
203
+ #data = response.choices[0].message.content
204
+
205
+ # filtering the uneccessary context.
206
+ if re.search(r'\bmention\b|\bnot mention\b|\bnot mentioned\b|\bnot contain\b|\bnot include\b|\bnot provide\b|\bdoes not\b|\bnot explicitly\b|\bnot explicitly mentioned\b', data, re.IGNORECASE):
207
+ data = "We do not have information related to your query on our end."
208
+
209
+ # Save the query and answer to the session history
210
+ session['history'].append((query_text, data))
211
+
212
+ # Mark the session as modified to ensure it gets saved
213
+ session.modified = True
214
+ print("sessionhist2",session['history'])
215
+
216
+ return render_template('chat.html', query_text=query_text, answer=data,token_count=approximate_bpe_token_counter(data), history=session['history'],old_db=CHROMA_PATH)
217
+
218
+ except Exception as e:
219
+ flash(f"Error in Creating DB: {e}","error")
220
+ return redirect(url_for('list_dbs'))
221
 
222
  return render_template('chat.html', history=session['history'], old_db=CHROMA_PATH)
223
 
224
+ ########################################################################################################################################################
225
+ ####---------------------------------------------------------------- routes for create-db ---------------------------------------------------------####
226
+ ########################################################################################################################################################
227
+
228
  @app.route('/create-db', methods=['GET', 'POST'])
229
  def create_db():
230
+ try:
231
+ if request.method == 'POST':
232
+ db_name = request.form.get('db_name', '').strip()
233
+ if not db_name:
234
+ return "Database name is required", 400
235
+
236
+ # Get uploaded files
237
+ files = request.files.getlist('folder') # Folder uploads (multiple files)
238
+ single_files = request.files.getlist('file') # Single file uploads
239
+
240
+ print("==================folder==>", files)
241
+ print("==================single_files==>", single_files)
242
+
243
+ # Ensure at least one valid file is uploaded
244
+ if not any(file.filename.strip() for file in files) and not any(file.filename.strip() for file in single_files):
245
+ return "No files uploaded", 400
246
+
247
+ # Create upload directory
248
+ upload_base_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(db_name))
249
+ print(f"Base Upload Path: {upload_base_path}")
250
+ os.makedirs(upload_base_path, exist_ok=True)
251
+
252
+ # Process single file uploads first (if any exist)
253
+ if any(file.filename.strip() for file in single_files):
254
+ for file in single_files:
255
+ if file.filename.strip(): # Ensure the file is valid
256
+ file_name = secure_filename(file.filename)
257
+ file_path = os.path.join(upload_base_path, file_name)
258
+ print(f"Saving single file to: {file_path}")
259
+ file.save(file_path)
260
+
261
+ # If single file is uploaded, skip folder processing
262
+ print("Single file uploaded, skipping folder processing.")
263
+ asyncio.run(generate_data_store(upload_base_path, db_name))
264
+ return redirect(url_for('list_dbs'))
265
+
266
+ # Process folder files only if valid files exist
267
+ if any(file.filename.strip() for file in files):
268
+ for file in files:
269
+ if file.filename.strip(): # Ensure it's a valid file
270
+ file_name = secure_filename(file.filename)
271
+ file_path = os.path.join(upload_base_path, file_name)
272
+ print(f"Saving folder file to: {file_path}")
273
+ file.save(file_path)
274
+
275
+ # Generate datastore
276
+ #flash("Warning: storing data in DB may take time","warning")
277
  asyncio.run(generate_data_store(upload_base_path, db_name))
278
+ flash(f"{db_name} created sucessfully!","success")
279
+ return redirect(url_for('list_dbs'))
280
+ except Exception as e:
281
+ flash(f"Error in Creating DB: {e}","error")
282
  return redirect(url_for('list_dbs'))
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
  return render_template('create_db.html')
285
 
286
+ ########################################################################################################################################################
287
+ ####------------------------------------------------------- routes for list-dbs and documents -----------------------------------------------------####
288
+ ########################################################################################################################################################
289
 
290
  @app.route('/list-dbs', methods=['GET'])
291
  def list_dbs():
292
  vector_dbs = [name for name in os.listdir(VECTOR_DB_FOLDER) if os.path.isdir(os.path.join(VECTOR_DB_FOLDER, name))]
293
+ if vector_dbs==[]:
294
+ flash("NO available DBs! Let create new db","error")
295
+ return redirect(url_for('create_db'))
296
  return render_template('list_dbs.html', vector_dbs=vector_dbs)
297
 
298
+ # @app.route('/list-docs/<db_name>/<opts>', methods=['GET','POST'])
299
+ # def list_docs(db_name,opts):
300
+ # try:
301
+ # if opts=='delete':
302
+ # return redirect(url_for('delete_doc', db_name=db_name))
303
+ # elif opts=='update':
304
+ # return redirect(url_for('update_db', db_name=db_name))
305
+ # else:
306
+ # return "URL not found",404
307
+
308
+ # except Exception as e:
309
+ # # By this way we can know about the type of error occurring
310
+ # print("The error is: ",e)
311
+ # print(f"unexpected error to list the document in the {db_name}")
312
+ # return redirect(url_for('modify_db', db_name=db_name))
313
+
314
  @app.route('/select-db/<db_name>', methods=['POST'])
315
  def select_db(db_name):
316
+ flash(f"{db_name} Database has been selected", "success")
317
  #Selecting the Documnet Vector DB
318
  global CHROMA_PATH
319
  global TABLE_PATH
320
  print(f"Selected DB: {CHROMA_PATH}")
321
+ print("---------------------------------------------------------")
322
  CHROMA_PATH = os.path.join(VECTOR_DB_FOLDER, db_name)
323
  CHROMA_PATH = CHROMA_PATH.replace("\\", "/")
324
  print(f"Selected DB: {CHROMA_PATH}")
325
+ print("---------------------------------------------------------")
326
 
327
  # Selecting the Table Vector DB
328
  table_db_path = os.path.join(TABLE_DB_FOLDER, db_name)
 
332
 
333
  return redirect(url_for('chat'))
334
 
335
+ ########################################################################################################################################################
336
+ ####---------------------------------------------------------- routes for modification of dbs -----------------------------------------------------####
337
+ ########################################################################################################################################################
338
+
339
+ @app.route('/modify-dbs/<db_name>', methods=['GET','POST'])
340
+ def modify_db(db_name):
341
+ flash(f"{db_name} Database is selected","success")
342
+ print(db_name)
343
+ return render_template('modify_dbs.html', db_name=db_name)
344
+
345
+ ########################################################################################################################################################
346
+ ####--------------------------------------------------------- routes for update exisiting of dbs --------------------------------------------------####
347
+ ########################################################################################################################################################
348
+
349
+
350
  @app.route('/update-dbs/<db_name>', methods=['GET','POST'])
351
  def update_db(db_name):
352
+ try:
353
+ if db_name and request.method == 'POST':
354
+ print(db_name)
355
+ #vector DB name is db_name
356
+ # Get all files from the uploaded folder
357
+ # Get uploaded files
358
+ files = request.files.getlist('folder') # Folder uploads (multiple files)
359
+ single_files = request.files.getlist('file') # Single file uploads
360
+
361
+ print("============from_update======folder==>", files)
362
+ print("============from_update======single_files==>", single_files)
363
+
364
+ # Ensure at least one valid file is uploaded
365
+ if not any(file.filename.strip() for file in files) and not any(file.filename.strip() for file in single_files):
366
+ return "No files uploaded", 400
367
+
368
+ # Create upload directory
369
+ upload_base_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(db_name))
370
+ print(f"Base Upload Path: {upload_base_path}")
371
+ os.makedirs(upload_base_path, exist_ok=True)
372
+
373
+ # Process single file uploads first (if any exist)
374
+ if any(file.filename.strip() for file in single_files):
375
+ for file in single_files:
376
+ if file.filename.strip(): # Ensure the file is valid
377
+ file_name = secure_filename(file.filename)
378
+ file_path = os.path.join(upload_base_path, file_name)
379
+ print(f"Saving single file to: {file_path}")
380
+ file.save(file_path)
381
+
382
+ # If single file is uploaded, skip folder processing
383
+ print("Single file uploaded, skipping folder processing.")
384
+ flash(f"{db_name} updated successfully!","success")
385
+ asyncio.run(update_data_store(upload_base_path, db_name))
386
+ return redirect(url_for('modify_db', db_name=db_name))
387
+
388
+ # Process folder files only if valid files exist
389
+ if any(file.filename.strip() for file in files):
390
+ for file in files:
391
+ if file.filename.strip(): # Ensure it's a valid file
392
+ file_name = secure_filename(file.filename)
393
+ file_path = os.path.join(upload_base_path, file_name)
394
+ print(f"Saving folder file to: {file_path}")
395
+ file.save(file_path)
396
+
397
+ # Generate datastore
398
+ asyncio.run(update_data_store(upload_base_path, db_name))
399
+ flash(f"{db_name} updated successfully!","success")
400
+ return redirect(url_for('modify_db', db_name=db_name))
401
+ except Exception as e:
402
+ print("No Database selected for updating")
403
+ print(f"got unexpected error {e}")
404
+ flash("got unexpected error while updating","error")
405
+ return render_template('update_db.html',db_name=db_name)
406
+
407
+ ########################################################################################################################################################
408
+ ####--------------------------------------------------------- routes for removing the of dbs ------------------------------------------------------####
409
+ ########################################################################################################################################################
410
+
411
+ @app.route('/remove-dbs/<db_name>', methods=['GET','POST'])
412
+ def remove_db(db_name):
413
+ if db_name:
414
+ print(db_name)
415
+ CHROMA_PATH = f"./VectorDB/{db_name}"
416
+ TABLE_PATH = f"./TableDB/{db_name}"
417
+ try:
418
+ if os.path.exists(CHROMA_PATH):
419
+ shutil.rmtree(CHROMA_PATH)
420
+ if os.path.exists(TABLE_PATH):
421
+ shutil.rmtree(TABLE_PATH)
422
+ flash(f"{db_name} Database Removed successfully","success")
423
+ return redirect(url_for('list_dbs'))
424
+ except Exception as e:
425
+ print(f"Error in getting table: {e}")
426
+ flash(f"Error in getting table: {e}","error")
427
+ return redirect(url_for('list_dbs'))
428
+
429
+ ########################################################################################################################################################
430
+ ####--------------------------------------------------------- routes for removing specific dbs ----------------------------------------------------####
431
+ ########################################################################################################################################################
432
+
433
+ @app.route('/delete-doc/<db_name>', methods=['GET', 'POST'])
434
+ def delete_doc(db_name):
435
+ try:
436
+ DB_PATH = f"./VectorDB/{db_name}"
437
+ TAB_PATH = f"./TableDB/{db_name}"
438
+
439
+ client = chromadb.PersistentClient(path=DB_PATH)
440
+
441
+ # Select your collection
442
+ collection = client.get_collection("langchain")
443
+
444
+ # Fetch all documents (including metadata)
445
+ results = collection.get(include=["metadatas"])
446
+
447
+ # Extract unique file names from metadata
448
+ file_list = set(item["filename"] for item in results["metadatas"] if "filename" in item)
449
+ print("file_list", file_list)
450
 
451
+ if request.method == 'POST':
452
+ list_doc = request.form.get('list_doc')
453
+ print("list_doc", list_doc)
 
 
 
 
 
454
 
455
+ # Delete from the VectorDB collection
456
+ collection.delete(where={"filename": f"{list_doc}"})
457
+ flash(f"The document '{list_doc}' has been removed from VectorDB.", "success")
458
 
459
+ # Check if TAB_PATH exists and delete the document from TableDB if present
460
+ if os.path.exists(TAB_PATH):
461
+ client_tab = chromadb.PersistentClient(path=TAB_PATH) # Create a new client for TableDB
462
+ collect_tab = client_tab.get_collection("langchain")
463
 
464
+ # Fetch documents in TableDB
465
+ result_tab = collect_tab.get(include=["metadatas"])
466
+
467
+ # Extract unique file names from TableDB metadata
468
+ file_list_tab = set(item["filename"] for item in result_tab["metadatas"] if "filename" in item)
469
+ print("TableDB file_list:", file_list_tab)
470
+
471
+ if list_doc in file_list_tab:
472
+ collect_tab.delete(where={"filename": f"{list_doc}"}) # Delete the document from TableDB
473
+ flash(f"The document '{list_doc}' has also been removed from TableDB.", "success")
474
+ else:
475
+ flash(f"The document '{list_doc}' was not found in TableDB.", "warning")
476
+ else:
477
+ print("Note: TableDB does not exist.")
478
+ flash(f"TableDB path '{TAB_PATH}' does not exist.", "warning")
479
+
480
+ return redirect(url_for('modify_db', db_name=db_name))
481
+
482
+ return render_template('delete_doc.html', db_name=db_name, file_list=file_list)
483
+
484
+ except Exception as e:
485
+ flash(f"Error while deleting documents: {e}", "error")
486
+ return redirect(url_for('modify_db', db_name=db_name))
487
+
488
+ ########################################################################################################################################################
489
+ ####---------------------------------------------------------------------- App MAIN ---------------------------------------------------------------####
490
+ ########################################################################################################################################################
491
+
492
  if __name__ == "__main__":
493
+ app.run(debug=False, use_reloader=False)
494
 
495