Gourisankar Padihary commited on
Commit
5b18a9a
·
1 Parent(s): afa7a1b

Apply sentence keys

Browse files
generator/document_utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ class Document:
4
+ def __init__(self, metadata, page_content):
5
+ self.metadata = metadata
6
+ self.page_content = page_content
7
+
8
+ def apply_sentence_keys_documents(relevant_docs: List[Document]):
9
+ result = []
10
+ for i, doc in enumerate(relevant_docs):
11
+ doc_id = str(i)
12
+ title_passage = doc.page_content.split('\nPassage: ')
13
+ title = title_passage[0]
14
+ passages = title_passage[1].split('. ')
15
+
16
+ doc_result = []
17
+ doc_result.append([f"{doc_id}a", title])
18
+
19
+ for j, passage in enumerate(passages):
20
+ doc_result.append([f"{doc_id}{chr(98 + j)}", passage])
21
+
22
+ result.append(doc_result)
23
+
24
+ return result
25
+
26
+ def apply_sentence_keys_response(input_string):
27
+ sentences = input_string.split('. ')
28
+ result = [[chr(97 + i), sentence] for i, sentence in enumerate(sentences)]
29
+ return result
generator/extract_attributes.py CHANGED
@@ -1,6 +1,6 @@
1
- import json
2
  from generator.create_prompt import create_prompt
3
  from generator.initialize_llm import initialize_llm
 
4
 
5
  # Initialize the LLM
6
  llm = initialize_llm()
@@ -8,10 +8,11 @@ llm = initialize_llm()
8
  # Function to extract attributes
9
  def extract_attributes(question, relevant_docs, response):
10
  # Format documents into a string by accessing the `page_content` attribute of each Document
11
- formatted_documents = "\n".join([f"Doc {i+1}: {doc.page_content}" for i, doc in enumerate(relevant_docs)])
 
 
12
 
13
- #print(f'Formated documents: {formatted_documents}')
14
- attribute_prompt = create_prompt(formatted_documents, question, response)
15
 
16
  # Instead of using BaseMessage, pass the formatted prompt directly to invoke
17
  result = llm.invoke(attribute_prompt)
 
 
1
  from generator.create_prompt import create_prompt
2
  from generator.initialize_llm import initialize_llm
3
+ from generator.document_utils import Document, apply_sentence_keys_documents, apply_sentence_keys_response
4
 
5
  # Initialize the LLM
6
  llm = initialize_llm()
 
8
  # Function to extract attributes
9
  def extract_attributes(question, relevant_docs, response):
10
  # Format documents into a string by accessing the `page_content` attribute of each Document
11
+ #formatted_documents = "\n".join([f"Doc {i+1}: {doc.page_content}" for i, doc in enumerate(relevant_docs)])
12
+ formatted_documents = apply_sentence_keys_documents(relevant_docs)
13
+ formatted_responses = apply_sentence_keys_response(response)
14
 
15
+ attribute_prompt = create_prompt(formatted_documents, question, formatted_responses)
 
16
 
17
  # Instead of using BaseMessage, pass the formatted prompt directly to invoke
18
  result = llm.invoke(attribute_prompt)
main.py CHANGED
@@ -34,8 +34,8 @@ def main():
34
  relevant_docs = retrieve_top_k_documents(vector_store, sample_question, top_k=5)
35
  logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
36
  # Log each retrieved document individually
37
- for i, doc in enumerate(relevant_docs):
38
- logging.info(f"Relevant document {i+1}: {doc} \n")
39
 
40
  # Initialize the LLM
41
  llm = initialize_llm()
 
34
  relevant_docs = retrieve_top_k_documents(vector_store, sample_question, top_k=5)
35
  logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
36
  # Log each retrieved document individually
37
+ #for i, doc in enumerate(relevant_docs):
38
+ #logging.info(f"Relevant document {i+1}: {doc} \n")
39
 
40
  # Initialize the LLM
41
  llm = initialize_llm()