Shreyas094 commited on
Commit
c702374
·
verified ·
1 Parent(s): 9ddac30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -14
app.py CHANGED
@@ -104,6 +104,11 @@ def update_vectors(files, parser):
104
  logging.warning(f"No chunks loaded from {file.name}")
105
  continue
106
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
 
 
 
 
 
107
  all_data.extend(data)
108
  total_chunks += len(data)
109
  if not any(doc["name"] == file.name for doc in uploaded_documents):
@@ -460,25 +465,34 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
460
  yield "No documents available. Please upload PDF documents to answer questions."
461
  return
462
 
463
- retriever = database.as_retriever()
464
- logging.info(f"Retrieving relevant documents for query: {query}")
465
- relevant_docs = retriever.get_relevant_documents(query)
466
- logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
467
-
468
- # Filter relevant_docs based on selected documents
469
- filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
470
- logging.info(f"Number of filtered documents: {len(filtered_docs)}")
471
-
472
- if not filtered_docs:
473
- logging.warning(f"No relevant information found in the selected documents: {selected_docs}")
474
- yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
 
 
475
  return
476
 
477
- for doc in filtered_docs:
 
 
 
 
 
 
 
478
  logging.info(f"Document source: {doc.metadata['source']}")
479
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
480
 
481
- context_str = "\n".join([doc.page_content for doc in filtered_docs])
482
  logging.info(f"Total context length: {len(context_str)}")
483
 
484
  if model == "@cf/meta/llama-3.1-8b-instruct":
 
104
  logging.warning(f"No chunks loaded from {file.name}")
105
  continue
106
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
107
+
108
+ # Add source to metadata
109
+ for chunk in data:
110
+ chunk.metadata["source"] = file.name
111
+
112
  all_data.extend(data)
113
  total_chunks += len(data)
114
  if not any(doc["name"] == file.name for doc in uploaded_documents):
 
465
  yield "No documents available. Please upload PDF documents to answer questions."
466
  return
467
 
468
+ # New function to get documents by source
469
+ def get_documents_by_source(database, sources):
470
+ return [doc for doc in database.docstore._dict.values() if doc.metadata["source"] in sources]
471
+
472
+ # Log all documents in the database
473
+ logging.info(f"All documents in database: {[doc.metadata['source'] for doc in database.docstore._dict.values()]}")
474
+
475
+ # Get only the selected documents
476
+ selected_docs_content = get_documents_by_source(database, selected_docs)
477
+ logging.info(f"Number of selected documents: {len(selected_docs_content)}")
478
+
479
+ if not selected_docs_content:
480
+ logging.warning(f"No documents found for the selected sources: {selected_docs}")
481
+ yield "No documents found for the selected sources. Please check your document selection."
482
  return
483
 
484
+ # Perform similarity search on selected documents
485
+ relevant_docs = sorted(selected_docs_content,
486
+ key=lambda doc: database.similarity_search_with_score(query, k=1, filter={"source": doc.metadata["source"]})[0][1],
487
+ reverse=True)
488
+
489
+ logging.info(f"Number of relevant documents: {len(relevant_docs)}")
490
+
491
+ for doc in relevant_docs:
492
  logging.info(f"Document source: {doc.metadata['source']}")
493
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
494
 
495
+ context_str = "\n".join([doc.page_content for doc in relevant_docs])
496
  logging.info(f"Total context length: {len(context_str)}")
497
 
498
  if model == "@cf/meta/llama-3.1-8b-instruct":