Spaces:

gourisankar85
/

realtime-rag-pipeline

Running

Gourisankar Padihary commited on Dec 15, 2024

Commit

5b18a9a

1 Parent(s): afa7a1b

Apply sentence keys

Files changed (3) hide show

generator/document_utils.py ADDED Viewed

+from typing import List
+class Document:
+    def __init__(self, metadata, page_content):
+        self.metadata = metadata
+        self.page_content = page_content
+def apply_sentence_keys_documents(relevant_docs: List[Document]):
+    result = []
+    for i, doc in enumerate(relevant_docs):
+        doc_id = str(i)
+        title_passage = doc.page_content.split('\nPassage: ')
+        title = title_passage[0]
+        passages = title_passage[1].split('. ')
+        doc_result = []
+        doc_result.append([f"{doc_id}a", title])
+        for j, passage in enumerate(passages):
+            doc_result.append([f"{doc_id}{chr(98 + j)}", passage])
+        result.append(doc_result)
+    return result
+def apply_sentence_keys_response(input_string):
+    sentences = input_string.split('. ')
+    result = [[chr(97 + i), sentence] for i, sentence in enumerate(sentences)]
+    return result

generator/extract_attributes.py CHANGED Viewed

@@ -1,6 +1,6 @@
-import json
 from generator.create_prompt import create_prompt
 from generator.initialize_llm import initialize_llm
 # Initialize the LLM
 llm = initialize_llm()
@@ -8,10 +8,11 @@ llm = initialize_llm()
 # Function to extract attributes
 def extract_attributes(question, relevant_docs, response):
     # Format documents into a string by accessing the `page_content` attribute of each Document
-    formatted_documents = "\n".join([f"Doc {i+1}: {doc.page_content}" for i, doc in enumerate(relevant_docs)])
-    #print(f'Formated documents: {formatted_documents}')
-    attribute_prompt = create_prompt(formatted_documents, question, response)
     # Instead of using BaseMessage, pass the formatted prompt directly to invoke
     result = llm.invoke(attribute_prompt)

 from generator.create_prompt import create_prompt
 from generator.initialize_llm import initialize_llm
+from generator.document_utils import Document, apply_sentence_keys_documents, apply_sentence_keys_response
 # Initialize the LLM
 llm = initialize_llm()
 # Function to extract attributes
 def extract_attributes(question, relevant_docs, response):
     # Format documents into a string by accessing the `page_content` attribute of each Document
+    #formatted_documents = "\n".join([f"Doc {i+1}: {doc.page_content}" for i, doc in enumerate(relevant_docs)])
+    formatted_documents = apply_sentence_keys_documents(relevant_docs)
+    formatted_responses = apply_sentence_keys_response(response)
+    attribute_prompt = create_prompt(formatted_documents, question, formatted_responses)
     # Instead of using BaseMessage, pass the formatted prompt directly to invoke
     result = llm.invoke(attribute_prompt)

main.py CHANGED Viewed

@@ -34,8 +34,8 @@ def main():
     relevant_docs = retrieve_top_k_documents(vector_store, sample_question, top_k=5)
     logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
     # Log each retrieved document individually
-    for i, doc in enumerate(relevant_docs):
-        logging.info(f"Relevant document {i+1}: {doc} \n")
     # Initialize the LLM
     llm = initialize_llm()

     relevant_docs = retrieve_top_k_documents(vector_store, sample_question, top_k=5)
     logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
     # Log each retrieved document individually
+    #for i, doc in enumerate(relevant_docs):
+        #logging.info(f"Relevant document {i+1}: {doc} \n")
     # Initialize the LLM
     llm = initialize_llm()