Spaces:

gourisankar85
/

realtime-rag-pipeline

Running

Gourisankar Padihary commited on Dec 24, 2024

Commit

0ea6d19

1 Parent(s): 9bde774

Changes for techqa data set

Files changed (4) hide show

generator/generate_metrics.py CHANGED Viewed

@@ -22,7 +22,7 @@ def generate_metrics(gen_llm, val_llm, vector_store, query):
     logging.info(f"Response from LLM: {response}")
     # Add a sleep interval to avoid hitting the rate limit
-    time.sleep(20)  # Adjust the sleep time as needed
     # Step 3: Extract attributes and total sentences for each query
     logging.info(f"Extracting attributes through validation LLM")

     logging.info(f"Response from LLM: {response}")
     # Add a sleep interval to avoid hitting the rate limit
+    time.sleep(25)  # Adjust the sleep time as needed
     # Step 3: Extract attributes and total sentences for each query
     logging.info(f"Extracting attributes through validation LLM")

generator/initialize_llm.py CHANGED Viewed

@@ -2,7 +2,7 @@ import logging
 import os
 from langchain_groq import ChatGroq
-def initialize_llm():
     os.environ["GROQ_API_KEY"] = "your_groq_api_key"
     model_name = "llama3-8b-8192"
     llm = ChatGroq(model=model_name, temperature=0.7)
@@ -11,7 +11,7 @@ def initialize_llm():
 def initialize_validation_llm():
     os.environ["GROQ_API_KEY"] = "your_groq_api_key"
-    model_name = "llama-3.1-8b-instant"
     llm = ChatGroq(model=model_name, temperature=0.7)
     logging.info(f'Validation LLM {model_name} initialized')
     return llm

 import os
 from langchain_groq import ChatGroq
+def initialize_generation_llm():
     os.environ["GROQ_API_KEY"] = "your_groq_api_key"
     model_name = "llama3-8b-8192"
     llm = ChatGroq(model=model_name, temperature=0.7)
 def initialize_validation_llm():
     os.environ["GROQ_API_KEY"] = "your_groq_api_key"
+    model_name = "llama3-70b-8192"
     llm = ChatGroq(model=model_name, temperature=0.7)
     logging.info(f'Validation LLM {model_name} initialized')
     return llm

main.py CHANGED Viewed

@@ -12,7 +12,7 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
 def main():
     logging.info("Starting the RAG pipeline")
-    data_set_name = 'covidqa'
     # Load the dataset
     dataset = load_data(data_set_name)
@@ -36,7 +36,7 @@ def main():
     val_llm = initialize_validation_llm()
     # Sample question
-    row_num = 10
     query = dataset[row_num]['question']
     # Call generate_metrics for above sample question

 def main():
     logging.info("Starting the RAG pipeline")
+    data_set_name = 'techqa'
     # Load the dataset
     dataset = load_data(data_set_name)
     val_llm = initialize_validation_llm()
     # Sample question
+    row_num = 7
     query = dataset[row_num]['question']
     # Call generate_metrics for above sample question

retriever/embed_documents.py CHANGED Viewed

@@ -2,6 +2,6 @@ from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 def embed_documents(documents):
-    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vector_store = FAISS.from_texts([doc['text'] for doc in documents], embedding_model)
     return vector_store

 from langchain_community.vectorstores import FAISS
 def embed_documents(documents):
+    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")
     vector_store = FAISS.from_texts([doc['text'] for doc in documents], embedding_model)
     return vector_store