|
import logging, json |
|
from data.load_dataset import load_data |
|
from retriever.chunk_documents import chunk_documents |
|
from retriever.embed_documents import embed_documents |
|
from retriever.retrieve_documents import retrieve_top_k_documents |
|
from generator.initialize_llm import initialize_llm |
|
from generator.generate_response import generate_response |
|
from generator.extract_attributes import extract_attributes |
|
from generator.compute_metrics import compute_metrics |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
|
def main(): |
|
logging.info("Starting the RAG pipeline") |
|
|
|
|
|
dataset = load_data() |
|
logging.info("Dataset loaded") |
|
|
|
|
|
documents = chunk_documents(dataset) |
|
logging.info("Documents chunked") |
|
|
|
|
|
vector_store = embed_documents(documents) |
|
logging.info("Documents embedded") |
|
|
|
|
|
sample_question = dataset[0]['question'] |
|
logging.info(f"Sample question: {sample_question}") |
|
|
|
|
|
relevant_docs = retrieve_top_k_documents(vector_store, sample_question, top_k=5) |
|
logging.info(f"Relevant documents retrieved :{len(relevant_docs)}") |
|
|
|
|
|
|
|
|
|
|
|
llm = initialize_llm() |
|
logging.info("LLM initialized") |
|
|
|
|
|
response, source_docs = generate_response(llm, vector_store, sample_question, relevant_docs) |
|
logging.info("Response generated") |
|
|
|
|
|
print(f"Response from LLM: {response}") |
|
|
|
|
|
|
|
attributes = extract_attributes(sample_question, source_docs, response) |
|
|
|
|
|
if attributes.content: |
|
result_content = attributes.content |
|
|
|
json_start = result_content.find("{") |
|
json_end = result_content.rfind("}") + 1 |
|
json_str = result_content[json_start:json_end] |
|
|
|
try: |
|
result_json = json.loads(json_str) |
|
print(json.dumps(result_json, indent=2)) |
|
|
|
|
|
metrics = compute_metrics(result_json) |
|
print(metrics) |
|
except json.JSONDecodeError as e: |
|
logging.error(f"JSONDecodeError: {e}") |
|
|
|
if __name__ == "__main__": |
|
main() |