|
from zenml import pipeline |
|
from zenml import Model |
|
from steps.data_ingestion_step import data_ingestion_step |
|
from steps.data_preprocessing_step import data_preprocessing_step |
|
from steps.outlier_detection_step import outlier_detection_step |
|
from steps.data_splitting_step import data_splitter_step |
|
from steps.model_building_step import model_builder_step |
|
from steps.model_evaluation_step import model_evaluation_step |
|
import logging |
|
import warnings |
|
|
|
|
|
warnings.filterwarnings("ignore", category=UserWarning, module="mlflow") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pipeline( |
|
model=Model( |
|
name="customer_churn_prediction", |
|
) |
|
) |
|
def training_pipeline(): |
|
"""Defines an end-to-end machine learning pipeline for customer churn prediction.""" |
|
|
|
"""Defines an end-to-end machine learning pipeline.""" |
|
|
|
|
|
raw_data = data_ingestion_step("/home/sarath_kumar/customer_chrun_prediction/data/customer_churn_dataset-training-master.csv.zip") |
|
|
|
|
|
|
|
cleaned_data = data_preprocessing_step(raw_data) |
|
|
|
|
|
outlier_removed_data = outlier_detection_step(cleaned_data) |
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = data_splitter_step(outlier_removed_data, target_column="Churn") |
|
|
|
|
|
|
|
model = model_builder_step(model_name="xgboost", X_train=X_train, y_train=y_train) |
|
|
|
metrics = model_evaluation_step(model, X_test, y_test) |
|
|
|
|
|
return model |
|
|
|
|
|
if __name__ == "__main__": |
|
training_pipeline() |
|
|