Gourisankar Padihary
Apply sentence keys
5b18a9a
raw
history blame
2.71 kB
import logging, json
from data.load_dataset import load_data
from retriever.chunk_documents import chunk_documents
from retriever.embed_documents import embed_documents
from retriever.retrieve_documents import retrieve_top_k_documents
from generator.initialize_llm import initialize_llm
from generator.generate_response import generate_response
from generator.extract_attributes import extract_attributes
from generator.compute_metrics import compute_metrics
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def main():
logging.info("Starting the RAG pipeline")
# Load the dataset
dataset = load_data()
logging.info("Dataset loaded")
# Chunk the dataset
documents = chunk_documents(dataset)
logging.info("Documents chunked")
# Embed the documents
vector_store = embed_documents(documents)
logging.info("Documents embedded")
# Sample question
sample_question = dataset[0]['question']
logging.info(f"Sample question: {sample_question}")
# Retrieve relevant documents
relevant_docs = retrieve_top_k_documents(vector_store, sample_question, top_k=5)
logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
# Log each retrieved document individually
#for i, doc in enumerate(relevant_docs):
#logging.info(f"Relevant document {i+1}: {doc} \n")
# Initialize the LLM
llm = initialize_llm()
logging.info("LLM initialized")
# Generate a response using the relevant documents
response, source_docs = generate_response(llm, vector_store, sample_question, relevant_docs)
logging.info("Response generated")
# Print the response
print(f"Response from LLM: {response}")
#print(f"Source Documents: {source_docs}")
# Valuations : Extract attributes from the response and source documents
attributes = extract_attributes(sample_question, source_docs, response)
# Only proceed if the content is not empty
if attributes.content:
result_content = attributes.content # Access the content attribute
# Extract the JSON part from the result_content
json_start = result_content.find("{")
json_end = result_content.rfind("}") + 1
json_str = result_content[json_start:json_end]
try:
result_json = json.loads(json_str)
print(json.dumps(result_json, indent=2))
# Compute metrics using the extracted attributes
metrics = compute_metrics(result_json)
print(metrics)
except json.JSONDecodeError as e:
logging.error(f"JSONDecodeError: {e}")
if __name__ == "__main__":
main()