Spaces:

Sarathkumar1304ai
/

streamlitwebapp

Running

App Files Files Community

streamlitwebapp / src /model_evaluation.py

Sarathkumar1304ai

Upload 91 files

2953afe verified 7 months ago

raw

history blame

2.18 kB

	# evaluation.py
	import logging
	import mlflow
	from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
	import pandas as pd
	from sklearn.base import BaseEstimator
	# from zenml.client import Client

	# experiment_tracker = Client().active_stack.experiment_tracker

	class ModelEvaluator:
	def __init__(self, model: BaseEstimator, X_test: pd.DataFrame, y_test: pd.Series):
	"""
	Initializes the ModelEvaluator with model, test data, and experiment details.

	Parameters:

	model : BaseEstimator
	The trained model to evaluate.
	X_test : pd.DataFrame
	The test features.
	y_test : pd.Series
	The true labels for the test set.
	experiment_name : str, optional
	Name of the MLflow experiment to log metrics. Default is 'default_experiment'.
	"""
	self.model = model
	self.X_test = X_test
	self.y_test = y_test

	self.logger = logging.getLogger(__name__)

	def evaluate_model(self) -> dict:
	"""
	Evaluates the model and logs metrics to MLflow.

	Returns:
	-------
	dict
	A dictionary containing accuracy, precision, recall, and f1-score.
	"""
	# Set experiment explicitly
	# mlflow.set_experiment(self.experiment_name)

	# with mlflow.start_run():
	y_pred = self.model.predict(self.X_test)

	# Calculate and log evaluation metrics
	metrics = {
	"accuracy": accuracy_score(self.y_test, y_pred),
	"precision": precision_score(self.y_test, y_pred, average='binary'),
	"recall": recall_score(self.y_test, y_pred, average='binary'),
	"f1_score": f1_score(self.y_test, y_pred, average='binary')
	}


	# # Log evaluation metrics
	# for metric_name, metric_value in metrics.items():
	# mlflow.log_metric(metric_name, metric_value)

	for metric_name, metric_value in metrics.items():
	# mlflow.log_metric(metric_name, metric_value)
	self.logger.info(f"{metric_name.capitalize()}: {metric_value:.4f}")

	return metrics