gourisankar85 commited on
Commit
db7e2f6
·
verified ·
1 Parent(s): 07aab40

Upload 2 files

Browse files
generator/document_utils.py CHANGED
@@ -1,35 +1,56 @@
1
- from typing import List
2
-
3
- class Document:
4
- def __init__(self, metadata, page_content):
5
- self.metadata = metadata
6
- self.page_content = page_content
7
-
8
- def apply_sentence_keys_documents(relevant_docs: List[Document]):
9
- result = []
10
- '''for i, doc in enumerate(relevant_docs):
11
- doc_id = str(i)
12
- title_passage = doc.page_content.split('\nPassage: ')
13
- title = title_passage[0]
14
- passages = title_passage[1].split('. ')
15
-
16
- doc_result = []
17
- doc_result.append([f"{doc_id}a", title])
18
-
19
- for j, passage in enumerate(passages):
20
- doc_result.append([f"{doc_id}{chr(98 + j)}", passage])
21
-
22
- result.append(doc_result)'''
23
-
24
- for relevant_doc_index, relevant_doc in enumerate(relevant_docs):
25
- sentences = []
26
- for sentence_index, sentence in enumerate(relevant_doc.page_content.split(".")):
27
- sentences.append([str(relevant_doc_index)+chr(97 + sentence_index), sentence])
28
- result.append(sentences)
29
-
30
- return result
31
-
32
- def apply_sentence_keys_response(input_string):
33
- sentences = input_string.split('. ')
34
- result = [[chr(97 + i), sentence] for i, sentence in enumerate(sentences)]
35
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import List
3
+
4
+ logs = []
5
+ class Document:
6
+ def __init__(self, metadata, page_content):
7
+ self.metadata = metadata
8
+ self.page_content = page_content
9
+
10
+ def apply_sentence_keys_documents(relevant_docs: List[Document]):
11
+ result = []
12
+ '''for i, doc in enumerate(relevant_docs):
13
+ doc_id = str(i)
14
+ title_passage = doc.page_content.split('\nPassage: ')
15
+ title = title_passage[0]
16
+ passages = title_passage[1].split('. ')
17
+
18
+ doc_result = []
19
+ doc_result.append([f"{doc_id}a", title])
20
+
21
+ for j, passage in enumerate(passages):
22
+ doc_result.append([f"{doc_id}{chr(98 + j)}", passage])
23
+
24
+ result.append(doc_result)'''
25
+
26
+ for relevant_doc_index, relevant_doc in enumerate(relevant_docs):
27
+ sentences = []
28
+ for sentence_index, sentence in enumerate(relevant_doc.page_content.split(".")):
29
+ sentences.append([str(relevant_doc_index)+chr(97 + sentence_index), sentence])
30
+ result.append(sentences)
31
+
32
+ return result
33
+
34
+ def apply_sentence_keys_response(input_string):
35
+ sentences = input_string.split('. ')
36
+ result = [[chr(97 + i), sentence] for i, sentence in enumerate(sentences)]
37
+ return result
38
+
39
+ def initialize_logging():
40
+ logger = logging.getLogger()
41
+ logger.setLevel(logging.INFO)
42
+
43
+ # Custom log handler to capture logs and add them to the logs list
44
+ class LogHandler(logging.Handler):
45
+ def emit(self, record):
46
+ log_entry = self.format(record)
47
+ logs.append(log_entry)
48
+
49
+ # Add custom log handler to the logger
50
+ log_handler = LogHandler()
51
+ log_handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
52
+ logger.addHandler(log_handler)
53
+
54
+ def get_logs():
55
+ """Retrieve logs for display."""
56
+ return "\n".join(logs[-100:]) # Only show the last 50 logs for example
generator/generate_metrics.py CHANGED
@@ -1,39 +1,39 @@
1
- import logging
2
- import time
3
- from generator.generate_response import generate_response
4
- from retriever.retrieve_documents import retrieve_top_k_documents
5
- from generator.compute_metrics import get_metrics
6
- from generator.extract_attributes import extract_attributes
7
-
8
- def retrieve_and_generate_response(gen_llm, vector_store, query):
9
- logging.info(f'Query: {query}')
10
-
11
- # Step 1: Retrieve relevant documents for given query
12
- relevant_docs = retrieve_top_k_documents(vector_store, query, top_k=5)
13
- #logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
14
-
15
- # Log each retrieved document individually
16
- #for i, doc in enumerate(relevant_docs):
17
- #logging.info(f"Relevant document {i+1}: {doc} \n")
18
-
19
- # Step 2: Generate a response using LLM
20
- response, source_docs = generate_response(gen_llm, vector_store, query, relevant_docs)
21
-
22
- logging.info(f"Response from LLM: {response}")
23
-
24
- return response, source_docs
25
-
26
- def generate_metrics(val_llm, response, source_docs, query, time_to_wait):
27
-
28
- # Add a sleep interval to avoid hitting the rate limit
29
- time.sleep(time_to_wait) # Adjust the sleep time as needed
30
-
31
- # Step 3: Extract attributes and total sentences for each query
32
- logging.info(f"Extracting attributes through validation LLM")
33
- attributes, total_sentences = extract_attributes(val_llm, query, source_docs, response)
34
- logging.info(f"Extracted attributes successfully")
35
-
36
- # Step 4 : Call the get metrics calculate metrics
37
- metrics = get_metrics(attributes, total_sentences)
38
-
39
  return attributes, metrics
 
1
+ import logging
2
+ import time
3
+ from generator.generate_response import generate_response
4
+ from retriever.retrieve_documents import retrieve_top_k_documents
5
+ from generator.compute_metrics import get_metrics
6
+ from generator.extract_attributes import extract_attributes
7
+
8
+ def retrieve_and_generate_response(gen_llm, vector_store, query):
9
+ logging.info(f'Query: {query}')
10
+
11
+ # Step 1: Retrieve relevant documents for given query
12
+ relevant_docs = retrieve_top_k_documents(vector_store, query, top_k=5)
13
+ #logging.info(f"Relevant documents retrieved :{len(relevant_docs)}")
14
+
15
+ # Log each retrieved document individually
16
+ #for i, doc in enumerate(relevant_docs):
17
+ #logging.info(f"Relevant document {i+1}: {doc} \n")
18
+
19
+ # Step 2: Generate a response using LLM
20
+ response, source_docs = generate_response(gen_llm, vector_store, query, relevant_docs)
21
+
22
+ logging.info(f"Response from LLM ({gen_llm.name}): {response}")
23
+
24
+ return response, source_docs
25
+
26
+ def generate_metrics(val_llm, response, source_docs, query, time_to_wait):
27
+
28
+ # Add a sleep interval to avoid hitting the rate limit
29
+ time.sleep(time_to_wait) # Adjust the sleep time as needed
30
+
31
+ # Step 3: Extract attributes and total sentences for each query
32
+ logging.info(f"Extracting attributes through validation LLM")
33
+ attributes, total_sentences = extract_attributes(val_llm, query, source_docs, response)
34
+ logging.info(f"Extracted attributes successfully")
35
+
36
+ # Step 4 : Call the get metrics calculate metrics
37
+ metrics = get_metrics(attributes, total_sentences)
38
+
39
  return attributes, metrics