Spaces:
Running
Running
Delete utils
Browse files- utils/__init__.py +0 -20
- utils/api_utils.py +0 -229
- utils/models.py +0 -157
- utils/performance.py +0 -135
utils/__init__.py
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Utils package initialization.
|
3 |
-
|
4 |
-
This package provides utility functions for the AskVeracity fact-checking system.
|
5 |
-
"""
|
6 |
-
|
7 |
-
from .api_utils import api_error_handler, safe_json_parse, RateLimiter
|
8 |
-
from .performance import PerformanceTracker
|
9 |
-
from .models import initialize_models, get_nlp_model, get_llm_model
|
10 |
-
|
11 |
-
|
12 |
-
__all__ = [
|
13 |
-
'api_error_handler',
|
14 |
-
'safe_json_parse',
|
15 |
-
'RateLimiter',
|
16 |
-
'PerformanceTracker',
|
17 |
-
'initialize_models',
|
18 |
-
'get_nlp_model',
|
19 |
-
'get_llm_model'
|
20 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/api_utils.py
DELETED
@@ -1,229 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
API utilities for the Fake News Detector application.
|
3 |
-
|
4 |
-
This module provides utilities for handling API calls, rate limiting,
|
5 |
-
error handling, and exponential backoff for retrying failed requests.
|
6 |
-
"""
|
7 |
-
|
8 |
-
import time
|
9 |
-
import functools
|
10 |
-
import random
|
11 |
-
import logging
|
12 |
-
import requests
|
13 |
-
from datetime import datetime, timedelta
|
14 |
-
from collections import deque
|
15 |
-
|
16 |
-
from config import RATE_LIMITS, ERROR_BACKOFF
|
17 |
-
|
18 |
-
logger = logging.getLogger("misinformation_detector")
|
19 |
-
|
20 |
-
class RateLimiter:
|
21 |
-
"""
|
22 |
-
Rate limiter for API calls with support for different APIs.
|
23 |
-
|
24 |
-
This class implements a token bucket algorithm for rate limiting,
|
25 |
-
with support for different rate limits for different APIs.
|
26 |
-
It also provides exponential backoff for error handling.
|
27 |
-
"""
|
28 |
-
|
29 |
-
def __init__(self):
|
30 |
-
"""Initialize the rate limiter with configuration from settings."""
|
31 |
-
# Store rate limits for different APIs
|
32 |
-
self.limits = {}
|
33 |
-
|
34 |
-
# Initialize limits from config
|
35 |
-
for api_name, limit_info in RATE_LIMITS.items():
|
36 |
-
self.limits[api_name] = {
|
37 |
-
"requests": limit_info["requests"],
|
38 |
-
"period": limit_info["period"],
|
39 |
-
"timestamps": deque()
|
40 |
-
}
|
41 |
-
|
42 |
-
# Error backoff settings
|
43 |
-
self.max_retries = ERROR_BACKOFF["max_retries"]
|
44 |
-
self.initial_backoff = ERROR_BACKOFF["initial_backoff"]
|
45 |
-
self.backoff_factor = ERROR_BACKOFF["backoff_factor"]
|
46 |
-
|
47 |
-
def check_and_update(self, api_name):
|
48 |
-
"""
|
49 |
-
Check if request is allowed and update timestamps.
|
50 |
-
|
51 |
-
Args:
|
52 |
-
api_name (str): Name of the API to check
|
53 |
-
|
54 |
-
Returns:
|
55 |
-
tuple: (allowed, wait_time)
|
56 |
-
- allowed (bool): Whether the request is allowed
|
57 |
-
- wait_time (float): Time to wait if not allowed
|
58 |
-
"""
|
59 |
-
if api_name not in self.limits:
|
60 |
-
return True, 0 # Unknown API, allow by default
|
61 |
-
|
62 |
-
now = datetime.now()
|
63 |
-
limit_info = self.limits[api_name]
|
64 |
-
|
65 |
-
# Remove timestamps older than the period
|
66 |
-
cutoff = now - timedelta(seconds=limit_info["period"])
|
67 |
-
while limit_info["timestamps"] and limit_info["timestamps"][0] < cutoff:
|
68 |
-
limit_info["timestamps"].popleft()
|
69 |
-
|
70 |
-
# Check if we're at the rate limit
|
71 |
-
if len(limit_info["timestamps"]) >= limit_info["requests"]:
|
72 |
-
# Calculate wait time until oldest timestamp expires
|
73 |
-
wait_time = (limit_info["timestamps"][0] + timedelta(seconds=limit_info["period"]) - now).total_seconds()
|
74 |
-
return False, max(0, wait_time)
|
75 |
-
|
76 |
-
# Add current timestamp and allow request
|
77 |
-
limit_info["timestamps"].append(now)
|
78 |
-
return True, 0
|
79 |
-
|
80 |
-
def wait_if_needed(self, api_name):
|
81 |
-
"""
|
82 |
-
Wait if rate limit is reached.
|
83 |
-
|
84 |
-
Args:
|
85 |
-
api_name (str): Name of the API to check
|
86 |
-
|
87 |
-
Returns:
|
88 |
-
bool: True if waited, False otherwise
|
89 |
-
"""
|
90 |
-
allowed, wait_time = self.check_and_update(api_name)
|
91 |
-
if not allowed:
|
92 |
-
logger.info(f"Rate limit reached for {api_name}. Waiting {wait_time:.2f} seconds...")
|
93 |
-
time.sleep(wait_time + 0.1) # Add a small buffer
|
94 |
-
return True
|
95 |
-
return False
|
96 |
-
|
97 |
-
def get_backoff_time(self, attempt):
|
98 |
-
"""
|
99 |
-
Calculate exponential backoff time with jitter.
|
100 |
-
|
101 |
-
Args:
|
102 |
-
attempt (int): Current attempt number (0-based)
|
103 |
-
|
104 |
-
Returns:
|
105 |
-
float: Backoff time in seconds
|
106 |
-
"""
|
107 |
-
backoff = self.initial_backoff * (self.backoff_factor ** attempt)
|
108 |
-
# Add jitter to prevent thundering herd problem
|
109 |
-
jitter = random.uniform(0, 0.1 * backoff)
|
110 |
-
return backoff + jitter
|
111 |
-
|
112 |
-
|
113 |
-
# Create rate limiter instance
|
114 |
-
rate_limiter = RateLimiter()
|
115 |
-
|
116 |
-
# API Error Handler decorator
|
117 |
-
def api_error_handler(api_name):
|
118 |
-
"""
|
119 |
-
Decorator for API calls with error handling and rate limiting.
|
120 |
-
|
121 |
-
This decorator handles rate limiting, retries with exponential
|
122 |
-
backoff, and error handling for API calls.
|
123 |
-
|
124 |
-
Args:
|
125 |
-
api_name (str): Name of the API being called
|
126 |
-
|
127 |
-
Returns:
|
128 |
-
callable: Decorated function
|
129 |
-
"""
|
130 |
-
def decorator(func):
|
131 |
-
@functools.wraps(func)
|
132 |
-
def wrapper(*args, **kwargs):
|
133 |
-
try:
|
134 |
-
# Apply rate limiting - make sure rate_limiter exists and has the method
|
135 |
-
if hasattr(rate_limiter, 'wait_if_needed'):
|
136 |
-
rate_limiter.wait_if_needed(api_name)
|
137 |
-
|
138 |
-
# Track retries
|
139 |
-
for attempt in range(rate_limiter.max_retries):
|
140 |
-
try:
|
141 |
-
return func(*args, **kwargs)
|
142 |
-
except requests.exceptions.HTTPError as e:
|
143 |
-
status_code = e.response.status_code if hasattr(e, 'response') else 0
|
144 |
-
|
145 |
-
# Handle specific HTTP errors
|
146 |
-
if status_code == 429: # Too Many Requests
|
147 |
-
logger.warning(f"{api_name} rate limit exceeded (429). Attempt {attempt+1}/{rate_limiter.max_retries}")
|
148 |
-
# Get retry-after header or use exponential backoff
|
149 |
-
retry_after = e.response.headers.get('Retry-After')
|
150 |
-
if retry_after and retry_after.isdigit():
|
151 |
-
wait_time = int(retry_after)
|
152 |
-
else:
|
153 |
-
wait_time = rate_limiter.get_backoff_time(attempt)
|
154 |
-
logger.info(f"Waiting {wait_time} seconds before retry...")
|
155 |
-
time.sleep(wait_time)
|
156 |
-
elif status_code >= 500: # Server errors
|
157 |
-
logger.warning(f"{api_name} server error ({status_code}). Attempt {attempt+1}/{rate_limiter.max_retries}")
|
158 |
-
time.sleep(rate_limiter.get_backoff_time(attempt))
|
159 |
-
elif status_code == 403: # Forbidden - likely API key issue
|
160 |
-
logger.error(f"{api_name} access forbidden (403). Check API key.")
|
161 |
-
return None # Don't retry on auth errors
|
162 |
-
elif status_code == 404: # Not Found
|
163 |
-
logger.warning(f"{api_name} resource not found (404).")
|
164 |
-
return None # Don't retry on resource not found
|
165 |
-
else:
|
166 |
-
logger.error(f"{api_name} HTTP error: {e}")
|
167 |
-
if attempt < rate_limiter.max_retries - 1:
|
168 |
-
wait_time = rate_limiter.get_backoff_time(attempt)
|
169 |
-
logger.info(f"Waiting {wait_time} seconds before retry...")
|
170 |
-
time.sleep(wait_time)
|
171 |
-
else:
|
172 |
-
return None
|
173 |
-
|
174 |
-
except requests.exceptions.ConnectionError as e:
|
175 |
-
logger.error(f"{api_name} connection error: {e}")
|
176 |
-
if attempt < rate_limiter.max_retries - 1:
|
177 |
-
wait_time = rate_limiter.get_backoff_time(attempt)
|
178 |
-
logger.info(f"Waiting {wait_time} seconds before retry...")
|
179 |
-
time.sleep(wait_time)
|
180 |
-
else:
|
181 |
-
return None
|
182 |
-
|
183 |
-
except requests.exceptions.Timeout as e:
|
184 |
-
logger.error(f"{api_name} timeout error: {e}")
|
185 |
-
if attempt < rate_limiter.max_retries - 1:
|
186 |
-
wait_time = rate_limiter.get_backoff_time(attempt)
|
187 |
-
logger.info(f"Waiting {wait_time} seconds before retry...")
|
188 |
-
time.sleep(wait_time)
|
189 |
-
else:
|
190 |
-
return None
|
191 |
-
|
192 |
-
except Exception as e:
|
193 |
-
logger.error(f"{api_name} unexpected error: {str(e)}")
|
194 |
-
if attempt < rate_limiter.max_retries - 1:
|
195 |
-
wait_time = rate_limiter.get_backoff_time(attempt)
|
196 |
-
logger.info(f"Waiting {wait_time} seconds before retry...")
|
197 |
-
time.sleep(wait_time)
|
198 |
-
else:
|
199 |
-
return None
|
200 |
-
|
201 |
-
# If we've exhausted all retries
|
202 |
-
logger.error(f"{api_name} call failed after {rate_limiter.max_retries} attempts")
|
203 |
-
return None
|
204 |
-
|
205 |
-
except Exception as e:
|
206 |
-
# Catch any unexpected errors in the decorator itself
|
207 |
-
logger.error(f"{api_name} decorator error: {str(e)}")
|
208 |
-
return None
|
209 |
-
|
210 |
-
return wrapper
|
211 |
-
return decorator
|
212 |
-
|
213 |
-
def safe_json_parse(response, api_name):
|
214 |
-
"""
|
215 |
-
Safely parse JSON response with error handling.
|
216 |
-
|
217 |
-
Args:
|
218 |
-
response (requests.Response): Response object to parse
|
219 |
-
api_name (str): Name of the API for logging
|
220 |
-
|
221 |
-
Returns:
|
222 |
-
dict: Parsed JSON or empty dict on error
|
223 |
-
"""
|
224 |
-
try:
|
225 |
-
return response.json()
|
226 |
-
except ValueError as e:
|
227 |
-
logger.error(f"Error parsing {api_name} JSON response: {e}")
|
228 |
-
logger.debug(f"Response content: {response.text[:500]}...")
|
229 |
-
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/models.py
DELETED
@@ -1,157 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Model management utility for the Fake News Detector application.
|
3 |
-
|
4 |
-
This module provides functions for initializing, caching, and
|
5 |
-
retrieving language models used throughout the application.
|
6 |
-
It ensures models are loaded efficiently and reused appropriately.
|
7 |
-
"""
|
8 |
-
|
9 |
-
import os
|
10 |
-
import logging
|
11 |
-
import functools
|
12 |
-
from langchain_openai import ChatOpenAI
|
13 |
-
import spacy
|
14 |
-
|
15 |
-
logger = logging.getLogger("misinformation_detector")
|
16 |
-
|
17 |
-
# Global variables for models
|
18 |
-
nlp = None
|
19 |
-
model = None
|
20 |
-
models_initialized = False
|
21 |
-
|
22 |
-
# Add caching decorator
|
23 |
-
def cached_model(func):
|
24 |
-
"""
|
25 |
-
Decorator to cache model loading for improved performance.
|
26 |
-
|
27 |
-
This decorator ensures that models are only loaded once and
|
28 |
-
then reused for subsequent calls, improving performance by
|
29 |
-
avoiding redundant model loading.
|
30 |
-
|
31 |
-
Args:
|
32 |
-
func (callable): Function that loads a model
|
33 |
-
|
34 |
-
Returns:
|
35 |
-
callable: Wrapped function that returns a cached model
|
36 |
-
"""
|
37 |
-
cache = {}
|
38 |
-
|
39 |
-
@functools.wraps(func)
|
40 |
-
def wrapper(*args, **kwargs):
|
41 |
-
# Use function name as cache key
|
42 |
-
key = func.__name__
|
43 |
-
if key not in cache:
|
44 |
-
logger.info(f"Model not in cache, calling {key}...")
|
45 |
-
cache[key] = func(*args, **kwargs)
|
46 |
-
return cache[key]
|
47 |
-
|
48 |
-
return wrapper
|
49 |
-
|
50 |
-
def initialize_models():
|
51 |
-
"""
|
52 |
-
Initialize all required models.
|
53 |
-
|
54 |
-
This function loads and initializes all the language models
|
55 |
-
needed by the application, including spaCy for NLP tasks and
|
56 |
-
OpenAI for LLM-based processing.
|
57 |
-
|
58 |
-
Returns:
|
59 |
-
str: Initialization status message
|
60 |
-
|
61 |
-
Raises:
|
62 |
-
ValueError: If OpenAI API key is not set
|
63 |
-
"""
|
64 |
-
global nlp, model, models_initialized
|
65 |
-
|
66 |
-
# Skip initialization if already done
|
67 |
-
if models_initialized:
|
68 |
-
logger.info("Models already initialized, skipping initialization")
|
69 |
-
return "Models already initialized"
|
70 |
-
|
71 |
-
# Check OpenAI API key
|
72 |
-
if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"].strip():
|
73 |
-
logger.error("OPENAI_API_KEY environment variable not set or empty")
|
74 |
-
raise ValueError("OpenAI API key is required. Please set it in the Hugging Face Space secrets.")
|
75 |
-
|
76 |
-
try:
|
77 |
-
# Load NLP model
|
78 |
-
try:
|
79 |
-
logger.info("Loading spaCy NLP model...")
|
80 |
-
nlp = spacy.load("en_core_web_sm")
|
81 |
-
logger.info("Loaded spaCy NLP model")
|
82 |
-
except OSError as e:
|
83 |
-
# This handles the case if the model wasn't installed correctly
|
84 |
-
logger.warning(f"Could not load spaCy model: {str(e)}")
|
85 |
-
logger.info("Attempting to download spaCy model...")
|
86 |
-
try:
|
87 |
-
import subprocess
|
88 |
-
import sys
|
89 |
-
# This downloads the model if it's missing
|
90 |
-
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
|
91 |
-
# Try loading again
|
92 |
-
nlp = spacy.load("en_core_web_sm")
|
93 |
-
logger.info("Successfully downloaded and loaded spaCy model")
|
94 |
-
except Exception as download_err:
|
95 |
-
logger.error(f"Failed to download spaCy model: {str(download_err)}")
|
96 |
-
# Continue with other initialization, we'll handle missing NLP model elsewhere
|
97 |
-
|
98 |
-
# Set up OpenAI model
|
99 |
-
logger.info("Initializing ChatOpenAI model...")
|
100 |
-
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
|
101 |
-
logger.info("Initialized ChatOpenAI model")
|
102 |
-
|
103 |
-
# Mark initialization as complete
|
104 |
-
models_initialized = True
|
105 |
-
return "Models initialized successfully"
|
106 |
-
|
107 |
-
except Exception as e:
|
108 |
-
logger.error(f"Error initializing models: {str(e)}")
|
109 |
-
raise e
|
110 |
-
|
111 |
-
@cached_model
|
112 |
-
def get_nlp_model():
|
113 |
-
"""
|
114 |
-
Get the spaCy NLP model, initializing if needed.
|
115 |
-
|
116 |
-
This function returns a cached spaCy model for NLP tasks.
|
117 |
-
If the model hasn't been loaded yet, it will be loaded.
|
118 |
-
|
119 |
-
Returns:
|
120 |
-
spacy.Language: Loaded spaCy model
|
121 |
-
"""
|
122 |
-
global nlp
|
123 |
-
if nlp is None:
|
124 |
-
try:
|
125 |
-
# Try to load just the spaCy model if not loaded yet
|
126 |
-
logger.info("Loading spaCy NLP model...")
|
127 |
-
nlp = spacy.load("en_core_web_sm")
|
128 |
-
logger.info("Loaded spaCy NLP model")
|
129 |
-
except Exception as e:
|
130 |
-
logger.error(f"Error loading spaCy model: {str(e)}")
|
131 |
-
# Fall back to full initialization
|
132 |
-
initialize_models()
|
133 |
-
return nlp
|
134 |
-
|
135 |
-
@cached_model
|
136 |
-
def get_llm_model():
|
137 |
-
"""
|
138 |
-
Get the ChatOpenAI model, initializing if needed.
|
139 |
-
|
140 |
-
This function returns a cached OpenAI LLM model.
|
141 |
-
If the model hasn't been loaded yet, it will be loaded.
|
142 |
-
|
143 |
-
Returns:
|
144 |
-
ChatOpenAI: Loaded LLM model
|
145 |
-
"""
|
146 |
-
global model
|
147 |
-
if model is None:
|
148 |
-
try:
|
149 |
-
# Try to load just the LLM model if not loaded yet
|
150 |
-
logger.info("Initializing ChatOpenAI model...")
|
151 |
-
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
|
152 |
-
logger.info("Initialized ChatOpenAI model")
|
153 |
-
except Exception as e:
|
154 |
-
logger.error(f"Error initializing ChatOpenAI model: {str(e)}")
|
155 |
-
# Fall back to full initialization
|
156 |
-
initialize_models()
|
157 |
-
return model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/performance.py
DELETED
@@ -1,135 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Performance tracking utility for the Fake News Detector application.
|
3 |
-
|
4 |
-
This module provides functionality to track and analyze the
|
5 |
-
performance of the application, including processing times,
|
6 |
-
success rates, and resource utilization.
|
7 |
-
"""
|
8 |
-
|
9 |
-
import time
|
10 |
-
import logging
|
11 |
-
|
12 |
-
logger = logging.getLogger("misinformation_detector")
|
13 |
-
|
14 |
-
class PerformanceTracker:
|
15 |
-
"""
|
16 |
-
Tracks and logs performance metrics for the fact-checking system.
|
17 |
-
|
18 |
-
This class maintains counters and statistics for various performance
|
19 |
-
metrics, such as processing times, evidence retrieval success rates,
|
20 |
-
and confidence scores.
|
21 |
-
"""
|
22 |
-
|
23 |
-
def __init__(self):
|
24 |
-
"""Initialize the performance tracker with empty metrics."""
|
25 |
-
self.metrics = {
|
26 |
-
"claims_processed": 0,
|
27 |
-
"evidence_retrieval_success_rate": [],
|
28 |
-
"processing_times": [],
|
29 |
-
"confidence_scores": [],
|
30 |
-
"source_types_used": {},
|
31 |
-
"temporal_relevance": []
|
32 |
-
}
|
33 |
-
|
34 |
-
def log_claim_processed(self):
|
35 |
-
"""
|
36 |
-
Increment the counter for processed claims.
|
37 |
-
This should be called whenever a claim is processed successfully.
|
38 |
-
"""
|
39 |
-
self.metrics["claims_processed"] += 1
|
40 |
-
|
41 |
-
def log_evidence_retrieval(self, success, sources_count):
|
42 |
-
"""
|
43 |
-
Log the success or failure of evidence retrieval.
|
44 |
-
|
45 |
-
Args:
|
46 |
-
success (bool): Whether evidence retrieval was successful
|
47 |
-
sources_count (dict): Count of evidence items by source type
|
48 |
-
"""
|
49 |
-
# Ensure success is a boolean
|
50 |
-
success_value = 1 if success else 0
|
51 |
-
self.metrics["evidence_retrieval_success_rate"].append(success_value)
|
52 |
-
|
53 |
-
# Safely process source types
|
54 |
-
if isinstance(sources_count, dict):
|
55 |
-
for source_type, count in sources_count.items():
|
56 |
-
# Ensure source_type is a string and count is an integer
|
57 |
-
source_type = str(source_type)
|
58 |
-
try:
|
59 |
-
count = int(count)
|
60 |
-
except (ValueError, TypeError):
|
61 |
-
count = 1
|
62 |
-
|
63 |
-
# Update source types used
|
64 |
-
self.metrics["source_types_used"][source_type] = \
|
65 |
-
self.metrics["source_types_used"].get(source_type, 0) + count
|
66 |
-
|
67 |
-
def log_processing_time(self, start_time):
|
68 |
-
"""
|
69 |
-
Log the processing time for an operation.
|
70 |
-
|
71 |
-
Args:
|
72 |
-
start_time (float): Start time obtained from time.time()
|
73 |
-
"""
|
74 |
-
end_time = time.time()
|
75 |
-
processing_time = end_time - start_time
|
76 |
-
self.metrics["processing_times"].append(processing_time)
|
77 |
-
|
78 |
-
def log_confidence_score(self, score):
|
79 |
-
"""
|
80 |
-
Log a confidence score.
|
81 |
-
|
82 |
-
Args:
|
83 |
-
score (float): Confidence score between 0 and 1
|
84 |
-
"""
|
85 |
-
# Ensure score is a float between 0 and 1
|
86 |
-
try:
|
87 |
-
score = float(score)
|
88 |
-
if 0 <= score <= 1:
|
89 |
-
self.metrics["confidence_scores"].append(score)
|
90 |
-
except (ValueError, TypeError):
|
91 |
-
logger.warning(f"Invalid confidence score: {score}")
|
92 |
-
|
93 |
-
def log_temporal_relevance(self, relevance_score):
|
94 |
-
"""
|
95 |
-
Log a temporal relevance score.
|
96 |
-
|
97 |
-
Args:
|
98 |
-
relevance_score (float): Temporal relevance score between 0 and 1
|
99 |
-
"""
|
100 |
-
# Ensure relevance score is a float between 0 and 1
|
101 |
-
try:
|
102 |
-
relevance_score = float(relevance_score)
|
103 |
-
if 0 <= relevance_score <= 1:
|
104 |
-
self.metrics["temporal_relevance"].append(relevance_score)
|
105 |
-
except (ValueError, TypeError):
|
106 |
-
logger.warning(f"Invalid temporal relevance score: {relevance_score}")
|
107 |
-
|
108 |
-
def get_summary(self):
|
109 |
-
"""
|
110 |
-
Get a summary of all performance metrics.
|
111 |
-
|
112 |
-
Returns:
|
113 |
-
dict: Summary of performance metrics
|
114 |
-
"""
|
115 |
-
# Safely calculate averages with error handling
|
116 |
-
def safe_avg(metric_list):
|
117 |
-
try:
|
118 |
-
return sum(metric_list) / max(len(metric_list), 1)
|
119 |
-
except (TypeError, ValueError):
|
120 |
-
return 0.0
|
121 |
-
|
122 |
-
return {
|
123 |
-
"claims_processed": self.metrics["claims_processed"],
|
124 |
-
"avg_evidence_retrieval_success_rate": safe_avg(self.metrics["evidence_retrieval_success_rate"]),
|
125 |
-
"avg_processing_time": safe_avg(self.metrics["processing_times"]),
|
126 |
-
"avg_confidence_score": safe_avg(self.metrics["confidence_scores"]),
|
127 |
-
"source_types_used": dict(self.metrics["source_types_used"]),
|
128 |
-
"avg_temporal_relevance": safe_avg(self.metrics["temporal_relevance"])
|
129 |
-
}
|
130 |
-
|
131 |
-
def reset(self):
|
132 |
-
"""Reset all performance metrics."""
|
133 |
-
self.__init__()
|
134 |
-
logger.info("Performance metrics have been reset")
|
135 |
-
return "Performance metrics reset successfully"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|