|
|
|
import logging |
|
import mlflow |
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score |
|
import pandas as pd |
|
from sklearn.base import BaseEstimator |
|
|
|
|
|
|
|
|
|
class ModelEvaluator: |
|
def __init__(self, model: BaseEstimator, X_test: pd.DataFrame, y_test: pd.Series): |
|
""" |
|
Initializes the ModelEvaluator with model, test data, and experiment details. |
|
|
|
Parameters: |
|
|
|
model : BaseEstimator |
|
The trained model to evaluate. |
|
X_test : pd.DataFrame |
|
The test features. |
|
y_test : pd.Series |
|
The true labels for the test set. |
|
experiment_name : str, optional |
|
Name of the MLflow experiment to log metrics. Default is 'default_experiment'. |
|
""" |
|
self.model = model |
|
self.X_test = X_test |
|
self.y_test = y_test |
|
|
|
self.logger = logging.getLogger(__name__) |
|
|
|
def evaluate_model(self) -> dict: |
|
""" |
|
Evaluates the model and logs metrics to MLflow. |
|
|
|
Returns: |
|
------- |
|
dict |
|
A dictionary containing accuracy, precision, recall, and f1-score. |
|
""" |
|
|
|
|
|
|
|
|
|
y_pred = self.model.predict(self.X_test) |
|
|
|
|
|
metrics = { |
|
"accuracy": accuracy_score(self.y_test, y_pred), |
|
"precision": precision_score(self.y_test, y_pred, average='binary'), |
|
"recall": recall_score(self.y_test, y_pred, average='binary'), |
|
"f1_score": f1_score(self.y_test, y_pred, average='binary') |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
for metric_name, metric_value in metrics.items(): |
|
|
|
self.logger.info(f"{metric_name.capitalize()}: {metric_value:.4f}") |
|
|
|
return metrics |
|
|