Spaces:
Runtime error
Runtime error
from zenml import pipeline | |
from zenml import Model | |
from steps.data_ingestion_step import data_ingestion_step | |
from steps.data_preprocessing_step import data_preprocessing_step | |
from steps.outlier_detection_step import outlier_detection_step | |
from steps.data_splitting_step import data_splitter_step | |
from steps.model_building_step import model_builder_step | |
from steps.model_evaluation_step import model_evaluation_step | |
import logging | |
import warnings | |
warnings.filterwarnings("ignore", category=UserWarning, module="mlflow") | |
# logging.basicConfig( | |
# level=logging.INFO, # Set the logging level to INFO | |
# format='%(asctime)s - %(levelname)s - %(message)s', | |
# handlers=[ | |
# logging.FileHandler("logging.log"), # Log to a file | |
# logging.StreamHandler() # Also log to console | |
# ] | |
# ) | |
def training_pipeline(): | |
"""Defines an end-to-end machine learning pipeline for customer churn prediction.""" | |
"""Defines an end-to-end machine learning pipeline.""" | |
# Data Ingestion Step | |
# Load raw data from the specified file path | |
raw_data = data_ingestion_step("/home/sarath_kumar/customer_chrun_prediction/data/customer_churn_dataset-training-master.csv.zip") | |
# Data Preprocessing Step | |
# Preprocess the raw data to clean and format it appropriately | |
cleaned_data = data_preprocessing_step(raw_data) | |
# Outlier Detection Step | |
outlier_removed_data = outlier_detection_step(cleaned_data) | |
# Data Splitting Step | |
# Split the dataset into training and testing sets | |
X_train, X_test, y_train, y_test = data_splitter_step(outlier_removed_data, target_column="Churn") | |
# Model Building Step | |
# Build and train the model using the training data | |
model = model_builder_step(model_name="xgboost", X_train=X_train, y_train=y_train) | |
metrics = model_evaluation_step(model, X_test, y_test) | |
# Return the trained model | |
return model | |
if __name__ == "__main__": | |
training_pipeline() | |