Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -104,6 +104,11 @@ def update_vectors(files, parser):
|
|
104 |
logging.warning(f"No chunks loaded from {file.name}")
|
105 |
continue
|
106 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
|
|
|
|
|
|
|
|
|
|
107 |
all_data.extend(data)
|
108 |
total_chunks += len(data)
|
109 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
@@ -460,25 +465,34 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
|
|
460 |
yield "No documents available. Please upload PDF documents to answer questions."
|
461 |
return
|
462 |
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
|
|
|
|
475 |
return
|
476 |
|
477 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
478 |
logging.info(f"Document source: {doc.metadata['source']}")
|
479 |
logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
|
480 |
|
481 |
-
context_str = "\n".join([doc.page_content for doc in
|
482 |
logging.info(f"Total context length: {len(context_str)}")
|
483 |
|
484 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
|
|
104 |
logging.warning(f"No chunks loaded from {file.name}")
|
105 |
continue
|
106 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
107 |
+
|
108 |
+
# Add source to metadata
|
109 |
+
for chunk in data:
|
110 |
+
chunk.metadata["source"] = file.name
|
111 |
+
|
112 |
all_data.extend(data)
|
113 |
total_chunks += len(data)
|
114 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
|
|
465 |
yield "No documents available. Please upload PDF documents to answer questions."
|
466 |
return
|
467 |
|
468 |
+
# New function to get documents by source
|
469 |
+
def get_documents_by_source(database, sources):
|
470 |
+
return [doc for doc in database.docstore._dict.values() if doc.metadata["source"] in sources]
|
471 |
+
|
472 |
+
# Log all documents in the database
|
473 |
+
logging.info(f"All documents in database: {[doc.metadata['source'] for doc in database.docstore._dict.values()]}")
|
474 |
+
|
475 |
+
# Get only the selected documents
|
476 |
+
selected_docs_content = get_documents_by_source(database, selected_docs)
|
477 |
+
logging.info(f"Number of selected documents: {len(selected_docs_content)}")
|
478 |
+
|
479 |
+
if not selected_docs_content:
|
480 |
+
logging.warning(f"No documents found for the selected sources: {selected_docs}")
|
481 |
+
yield "No documents found for the selected sources. Please check your document selection."
|
482 |
return
|
483 |
|
484 |
+
# Perform similarity search on selected documents
|
485 |
+
relevant_docs = sorted(selected_docs_content,
|
486 |
+
key=lambda doc: database.similarity_search_with_score(query, k=1, filter={"source": doc.metadata["source"]})[0][1],
|
487 |
+
reverse=True)
|
488 |
+
|
489 |
+
logging.info(f"Number of relevant documents: {len(relevant_docs)}")
|
490 |
+
|
491 |
+
for doc in relevant_docs:
|
492 |
logging.info(f"Document source: {doc.metadata['source']}")
|
493 |
logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
|
494 |
|
495 |
+
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
496 |
logging.info(f"Total context length: {len(context_str)}")
|
497 |
|
498 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|