File size: 2,711 Bytes
bd69eee 026aeba bd69eee 79dcf63 bd69eee 026aeba bd69eee 026aeba bd69eee 026aeba bd69eee 026aeba bd69eee 79dcf63 026aeba bd69eee 026aeba 79dcf63 bd69eee 5b18a9a 79dcf63 bd69eee 79dcf63 bd69eee 026aeba 79dcf63 026aeba 79dcf63 bd69eee 79dcf63 bd69eee 026aeba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import logging, json
from data.load_dataset import load_data
from retriever.chunk_documents import chunk_documents
from retriever.embed_documents import embed_documents
from retriever.retrieve_documents import retrieve_top_k_documents
from generator.initialize_llm import initialize_llm
from generator.generate_response import generate_response
from generator.extract_attributes import extract_attributes
from generator.compute_metrics import compute_metrics
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def main():
logging.info("Starting the RAG pipeline")
# Load the dataset
dataset = load_data()
logging.info("Dataset loaded")
# Chunk the dataset
documents = chunk_documents(dataset)
logging.info("Documents chunked")
# Embed the documents
vector_store = embed_documents(documents)
logging.info("Documents embedded")
# Sample question
sample_question = dataset[0]['question']
logging.info(f"Sample question: {sample_question}")
# Retrieve relevant documents
relevant_docs = retrieve_top_k_documents(vector_store, sample_question, top_k=5)
logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
# Log each retrieved document individually
#for i, doc in enumerate(relevant_docs):
#logging.info(f"Relevant document {i+1}: {doc} \n")
# Initialize the LLM
llm = initialize_llm()
logging.info("LLM initialized")
# Generate a response using the relevant documents
response, source_docs = generate_response(llm, vector_store, sample_question, relevant_docs)
logging.info("Response generated")
# Print the response
print(f"Response from LLM: {response}")
#print(f"Source Documents: {source_docs}")
# Valuations : Extract attributes from the response and source documents
attributes = extract_attributes(sample_question, source_docs, response)
# Only proceed if the content is not empty
if attributes.content:
result_content = attributes.content # Access the content attribute
# Extract the JSON part from the result_content
json_start = result_content.find("{")
json_end = result_content.rfind("}") + 1
json_str = result_content[json_start:json_end]
try:
result_json = json.loads(json_str)
print(json.dumps(result_json, indent=2))
# Compute metrics using the extracted attributes
metrics = compute_metrics(result_json)
print(metrics)
except json.JSONDecodeError as e:
logging.error(f"JSONDecodeError: {e}")
if __name__ == "__main__":
main() |