streamlit_app / src /model_evaluation.py
Sarathkumar1304ai's picture
all files
92b63f0 verified
raw
history blame
2.18 kB
# evaluation.py
import logging
import mlflow
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
from sklearn.base import BaseEstimator
# from zenml.client import Client
# experiment_tracker = Client().active_stack.experiment_tracker
class ModelEvaluator:
def __init__(self, model: BaseEstimator, X_test: pd.DataFrame, y_test: pd.Series):
"""
Initializes the ModelEvaluator with model, test data, and experiment details.
Parameters:
model : BaseEstimator
The trained model to evaluate.
X_test : pd.DataFrame
The test features.
y_test : pd.Series
The true labels for the test set.
experiment_name : str, optional
Name of the MLflow experiment to log metrics. Default is 'default_experiment'.
"""
self.model = model
self.X_test = X_test
self.y_test = y_test
self.logger = logging.getLogger(__name__)
def evaluate_model(self) -> dict:
"""
Evaluates the model and logs metrics to MLflow.
Returns:
-------
dict
A dictionary containing accuracy, precision, recall, and f1-score.
"""
# Set experiment explicitly
# mlflow.set_experiment(self.experiment_name)
# with mlflow.start_run():
y_pred = self.model.predict(self.X_test)
# Calculate and log evaluation metrics
metrics = {
"accuracy": accuracy_score(self.y_test, y_pred),
"precision": precision_score(self.y_test, y_pred, average='binary'),
"recall": recall_score(self.y_test, y_pred, average='binary'),
"f1_score": f1_score(self.y_test, y_pred, average='binary')
}
# # Log evaluation metrics
# for metric_name, metric_value in metrics.items():
# mlflow.log_metric(metric_name, metric_value)
for metric_name, metric_value in metrics.items():
# mlflow.log_metric(metric_name, metric_value)
self.logger.info(f"{metric_name.capitalize()}: {metric_value:.4f}")
return metrics