Gourisankar Padihary
commited on
Commit
·
9bde774
1
Parent(s):
cfb3435
code optimization, added sleep between call to groq
Browse files- generator/compute_metrics.py +1 -1
- generator/compute_rmse_auc_roc_metrics.py +11 -7
- generator/extract_attributes.py +3 -7
- generator/generate_metrics.py +11 -5
- generator/generate_response.py +9 -4
- main.py +9 -5
generator/compute_metrics.py
CHANGED
@@ -43,7 +43,7 @@ def get_metrics(attributes, total_sentences):
|
|
43 |
result_json = json.loads(json_str)
|
44 |
# Compute metrics using the extracted attributes
|
45 |
metrics = compute_metrics(result_json, total_sentences)
|
46 |
-
|
47 |
return metrics
|
48 |
except json.JSONDecodeError as e:
|
49 |
logging.error(f"JSONDecodeError: {e}")
|
|
|
43 |
result_json = json.loads(json_str)
|
44 |
# Compute metrics using the extracted attributes
|
45 |
metrics = compute_metrics(result_json, total_sentences)
|
46 |
+
logging.info(metrics)
|
47 |
return metrics
|
48 |
except json.JSONDecodeError as e:
|
49 |
logging.error(f"JSONDecodeError: {e}")
|
generator/compute_rmse_auc_roc_metrics.py
CHANGED
@@ -3,7 +3,7 @@ from sklearn.metrics import roc_auc_score, root_mean_squared_error
|
|
3 |
from generator.generate_metrics import generate_metrics
|
4 |
import logging
|
5 |
|
6 |
-
def compute_rmse_auc_roc_metrics(
|
7 |
|
8 |
# Lists to accumulate ground truths and predictions for AUC-ROC computation
|
9 |
all_ground_truth_relevance = []
|
@@ -25,7 +25,7 @@ def compute_rmse_auc_roc_metrics(llm, dataset, vector_store, num_question):
|
|
25 |
query = document['question']
|
26 |
logging.info(f'Query number: {i + 1}')
|
27 |
# Call the generate_metrics for each query
|
28 |
-
metrics = generate_metrics(
|
29 |
|
30 |
# Extract predicted metrics (ensure these are continuous if possible)
|
31 |
predicted_relevance = metrics.get('Context Relevance', 0) if metrics else 0
|
@@ -46,22 +46,26 @@ def compute_rmse_auc_roc_metrics(llm, dataset, vector_store, num_question):
|
|
46 |
|
47 |
# === Compute RMSE & AUC-ROC for the Entire Dataset ===
|
48 |
try:
|
|
|
|
|
49 |
relevance_rmse = root_mean_squared_error(all_ground_truth_relevance, all_predicted_relevance)
|
50 |
except ValueError:
|
51 |
relevance_rmse = None
|
52 |
|
53 |
try:
|
|
|
|
|
54 |
utilization_rmse = root_mean_squared_error(all_ground_truth_utilization, all_predicted_utilization)
|
55 |
except ValueError:
|
56 |
utilization_rmse = None
|
57 |
|
58 |
try:
|
59 |
-
|
60 |
-
|
61 |
adherence_auc = roc_auc_score(all_ground_truth_adherence, all_predicted_adherence)
|
62 |
except ValueError:
|
63 |
adherence_auc = None
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
3 |
from generator.generate_metrics import generate_metrics
|
4 |
import logging
|
5 |
|
6 |
+
def compute_rmse_auc_roc_metrics(gen_llm, val_llm, dataset, vector_store, num_question):
|
7 |
|
8 |
# Lists to accumulate ground truths and predictions for AUC-ROC computation
|
9 |
all_ground_truth_relevance = []
|
|
|
25 |
query = document['question']
|
26 |
logging.info(f'Query number: {i + 1}')
|
27 |
# Call the generate_metrics for each query
|
28 |
+
metrics = generate_metrics(gen_llm, val_llm, vector_store, query)
|
29 |
|
30 |
# Extract predicted metrics (ensure these are continuous if possible)
|
31 |
predicted_relevance = metrics.get('Context Relevance', 0) if metrics else 0
|
|
|
46 |
|
47 |
# === Compute RMSE & AUC-ROC for the Entire Dataset ===
|
48 |
try:
|
49 |
+
logging.info(f"All Ground Truth Relevance: {all_ground_truth_relevance}")
|
50 |
+
logging.info(f"All Predicted Relevance: {all_predicted_relevance}")
|
51 |
relevance_rmse = root_mean_squared_error(all_ground_truth_relevance, all_predicted_relevance)
|
52 |
except ValueError:
|
53 |
relevance_rmse = None
|
54 |
|
55 |
try:
|
56 |
+
logging.info(f"All Ground Truth Utilization: {all_ground_truth_utilization}")
|
57 |
+
logging.info(f"All Predicted Utilization: {all_predicted_utilization}")
|
58 |
utilization_rmse = root_mean_squared_error(all_ground_truth_utilization, all_predicted_utilization)
|
59 |
except ValueError:
|
60 |
utilization_rmse = None
|
61 |
|
62 |
try:
|
63 |
+
logging.info(f"All Ground Truth Adherence: {all_ground_truth_adherence}")
|
64 |
+
logging.info(f"All Predicted Adherence: {all_predicted_adherence}")
|
65 |
adherence_auc = roc_auc_score(all_ground_truth_adherence, all_predicted_adherence)
|
66 |
except ValueError:
|
67 |
adherence_auc = None
|
68 |
|
69 |
+
logging.info(f"Relevance RMSE score: {relevance_rmse}")
|
70 |
+
logging.info(f"Utilization RMSE score: {utilization_rmse}")
|
71 |
+
logging.info(f"Overall Adherence AUC-ROC: {adherence_auc}")
|
generator/extract_attributes.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
from generator.create_prompt import create_prompt
|
2 |
-
from generator.
|
3 |
-
from generator.document_utils import Document, apply_sentence_keys_documents, apply_sentence_keys_response
|
4 |
|
5 |
# Function to extract attributes
|
6 |
-
def extract_attributes(question, relevant_docs, response):
|
7 |
# Format documents into a string by accessing the `page_content` attribute of each Document
|
8 |
#formatted_documents = "\n".join([f"Doc {i+1}: {doc.page_content}" for i, doc in enumerate(relevant_docs)])
|
9 |
formatted_documents = apply_sentence_keys_documents(relevant_docs)
|
@@ -21,10 +20,7 @@ def extract_attributes(question, relevant_docs, response):
|
|
21 |
|
22 |
attribute_prompt = create_prompt(formatted_documents, question, formatted_responses)
|
23 |
|
24 |
-
# Initialize the LLM
|
25 |
-
llm_val = initialize_validation_llm()
|
26 |
-
|
27 |
# Instead of using BaseMessage, pass the formatted prompt directly to invoke
|
28 |
-
result =
|
29 |
|
30 |
return result, total_sentences
|
|
|
1 |
from generator.create_prompt import create_prompt
|
2 |
+
from generator.document_utils import apply_sentence_keys_documents, apply_sentence_keys_response
|
|
|
3 |
|
4 |
# Function to extract attributes
|
5 |
+
def extract_attributes(val_llm, question, relevant_docs, response):
|
6 |
# Format documents into a string by accessing the `page_content` attribute of each Document
|
7 |
#formatted_documents = "\n".join([f"Doc {i+1}: {doc.page_content}" for i, doc in enumerate(relevant_docs)])
|
8 |
formatted_documents = apply_sentence_keys_documents(relevant_docs)
|
|
|
20 |
|
21 |
attribute_prompt = create_prompt(formatted_documents, question, formatted_responses)
|
22 |
|
|
|
|
|
|
|
23 |
# Instead of using BaseMessage, pass the formatted prompt directly to invoke
|
24 |
+
result = val_llm.invoke(attribute_prompt)
|
25 |
|
26 |
return result, total_sentences
|
generator/generate_metrics.py
CHANGED
@@ -1,29 +1,35 @@
|
|
1 |
import logging
|
|
|
2 |
from generator.generate_response import generate_response
|
3 |
from retriever.retrieve_documents import retrieve_top_k_documents
|
4 |
from generator.compute_metrics import get_metrics
|
5 |
from generator.extract_attributes import extract_attributes
|
6 |
|
7 |
-
def generate_metrics(
|
8 |
logging.info(f'Query: {query}')
|
9 |
|
10 |
# Step 1: Retrieve relevant documents for given query
|
11 |
relevant_docs = retrieve_top_k_documents(vector_store, query, top_k=5)
|
12 |
-
logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
|
13 |
|
14 |
# Log each retrieved document individually
|
15 |
#for i, doc in enumerate(relevant_docs):
|
16 |
#logging.info(f"Relevant document {i+1}: {doc} \n")
|
17 |
|
18 |
# Step 2: Generate a response using LLM
|
19 |
-
response, source_docs = generate_response(
|
20 |
|
21 |
logging.info(f"Response from LLM: {response}")
|
22 |
|
|
|
|
|
|
|
23 |
# Step 3: Extract attributes and total sentences for each query
|
24 |
-
attributes
|
|
|
|
|
25 |
|
26 |
-
# Call the
|
27 |
metrics = get_metrics(attributes, total_sentences)
|
28 |
|
29 |
return metrics
|
|
|
1 |
import logging
|
2 |
+
import time
|
3 |
from generator.generate_response import generate_response
|
4 |
from retriever.retrieve_documents import retrieve_top_k_documents
|
5 |
from generator.compute_metrics import get_metrics
|
6 |
from generator.extract_attributes import extract_attributes
|
7 |
|
8 |
+
def generate_metrics(gen_llm, val_llm, vector_store, query):
|
9 |
logging.info(f'Query: {query}')
|
10 |
|
11 |
# Step 1: Retrieve relevant documents for given query
|
12 |
relevant_docs = retrieve_top_k_documents(vector_store, query, top_k=5)
|
13 |
+
#logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
|
14 |
|
15 |
# Log each retrieved document individually
|
16 |
#for i, doc in enumerate(relevant_docs):
|
17 |
#logging.info(f"Relevant document {i+1}: {doc} \n")
|
18 |
|
19 |
# Step 2: Generate a response using LLM
|
20 |
+
response, source_docs = generate_response(gen_llm, vector_store, query, relevant_docs)
|
21 |
|
22 |
logging.info(f"Response from LLM: {response}")
|
23 |
|
24 |
+
# Add a sleep interval to avoid hitting the rate limit
|
25 |
+
time.sleep(20) # Adjust the sleep time as needed
|
26 |
+
|
27 |
# Step 3: Extract attributes and total sentences for each query
|
28 |
+
logging.info(f"Extracting attributes through validation LLM")
|
29 |
+
attributes, total_sentences = extract_attributes(val_llm, query, source_docs, response)
|
30 |
+
logging.info(f"Extracted attributes successfully")
|
31 |
|
32 |
+
# Step 4 : Call the get metrics calculate metrics
|
33 |
metrics = get_metrics(attributes, total_sentences)
|
34 |
|
35 |
return metrics
|
generator/generate_response.py
CHANGED
@@ -7,7 +7,12 @@ def generate_response(llm, vector_store, question, relevant_docs):
|
|
7 |
retriever=vector_store.as_retriever(),
|
8 |
return_source_documents=True
|
9 |
)
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
7 |
retriever=vector_store.as_retriever(),
|
8 |
return_source_documents=True
|
9 |
)
|
10 |
+
try:
|
11 |
+
result = qa_chain.invoke(question, documents=relevant_docs)
|
12 |
+
response = result['result']
|
13 |
+
source_docs = result['source_documents']
|
14 |
+
return response, source_docs
|
15 |
+
except Exception as e:
|
16 |
+
print(f"Error during QA chain invocation: {e}")
|
17 |
+
raise e
|
18 |
+
|
main.py
CHANGED
@@ -4,7 +4,8 @@ from generator.compute_rmse_auc_roc_metrics import compute_rmse_auc_roc_metrics
|
|
4 |
from retriever.chunk_documents import chunk_documents
|
5 |
from retriever.embed_documents import embed_documents
|
6 |
from generator.generate_metrics import generate_metrics
|
7 |
-
from generator.initialize_llm import
|
|
|
8 |
|
9 |
# Configure logging
|
10 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
@@ -29,17 +30,20 @@ def main():
|
|
29 |
logging.info("Documents embedded")
|
30 |
|
31 |
# Initialize the Generation LLM
|
32 |
-
|
|
|
|
|
|
|
33 |
|
34 |
# Sample question
|
35 |
row_num = 10
|
36 |
-
|
37 |
|
38 |
# Call generate_metrics for above sample question
|
39 |
-
generate_metrics(
|
40 |
|
41 |
#Compute RMSE and AUC-ROC for entire dataset
|
42 |
-
compute_rmse_auc_roc_metrics(
|
43 |
|
44 |
logging.info("Finished!!!")
|
45 |
|
|
|
4 |
from retriever.chunk_documents import chunk_documents
|
5 |
from retriever.embed_documents import embed_documents
|
6 |
from generator.generate_metrics import generate_metrics
|
7 |
+
from generator.initialize_llm import initialize_generation_llm
|
8 |
+
from generator.initialize_llm import initialize_validation_llm
|
9 |
|
10 |
# Configure logging
|
11 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
30 |
logging.info("Documents embedded")
|
31 |
|
32 |
# Initialize the Generation LLM
|
33 |
+
gen_llm = initialize_generation_llm()
|
34 |
+
|
35 |
+
# Initialize the Validation LLM
|
36 |
+
val_llm = initialize_validation_llm()
|
37 |
|
38 |
# Sample question
|
39 |
row_num = 10
|
40 |
+
query = dataset[row_num]['question']
|
41 |
|
42 |
# Call generate_metrics for above sample question
|
43 |
+
generate_metrics(gen_llm, val_llm, vector_store, query)
|
44 |
|
45 |
#Compute RMSE and AUC-ROC for entire dataset
|
46 |
+
compute_rmse_auc_roc_metrics(gen_llm, val_llm, dataset, vector_store, 10)
|
47 |
|
48 |
logging.info("Finished!!!")
|
49 |
|