Spaces:
Sleeping
Sleeping
File size: 2,320 Bytes
f7c2fa3 026aeba f7c2fa3 026aeba bd69eee f7c2fa3 bd69eee 026aeba bd69eee 026aeba bd69eee 026aeba bd69eee 026aeba bd69eee 79dcf63 026aeba f7c2fa3 bd69eee 026aeba 79dcf63 bd69eee 5b18a9a 79dcf63 bd69eee 79dcf63 bd69eee 026aeba b1b2c27 79dcf63 026aeba 79dcf63 b1b2c27 bd69eee f7c2fa3 026aeba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import logging
from data.load_dataset import load_data
from generator import compute_rmse_auc_roc_metrics
from retriever.chunk_documents import chunk_documents
from retriever.embed_documents import embed_documents
from retriever.retrieve_documents import retrieve_top_k_documents
from generator.initialize_llm import initialize_llm
from generator.generate_response import generate_response
from generator.extract_attributes import extract_attributes
from generator.compute_metrics import get_metrics
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def main():
logging.info("Starting the RAG pipeline")
# Load the dataset
dataset = load_data()
logging.info("Dataset loaded")
# Chunk the dataset
documents = chunk_documents(dataset)
logging.info("Documents chunked")
# Embed the documents
vector_store = embed_documents(documents)
logging.info("Documents embedded")
# Sample question
row_num = 1
sample_question = dataset[row_num]['question']
logging.info(f"Sample question: {sample_question}")
# Retrieve relevant documents
relevant_docs = retrieve_top_k_documents(vector_store, sample_question, top_k=5)
logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
# Log each retrieved document individually
#for i, doc in enumerate(relevant_docs):
#logging.info(f"Relevant document {i+1}: {doc} \n")
# Initialize the LLM
llm = initialize_llm()
logging.info("LLM initialized")
# Generate a response using the relevant documents
response, source_docs = generate_response(llm, vector_store, sample_question, relevant_docs)
logging.info("Response generated")
# Print the response
logging.info(f"Response from LLM: {response}")
#print(f"Source Documents: {source_docs}")
# Valuations : Extract attributes from the response and source documents
attributes, total_sentences = extract_attributes(sample_question, source_docs, response)
# Call the process_attributes method in the main block
metrics = get_metrics(attributes, total_sentences)
#Compute RMSE and AUC-ROC for entire dataset
#compute_rmse_auc_roc_metrics(llm, dataset, vector_store)
if __name__ == "__main__":
main() |