File size: 2,064 Bytes
92b63f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from zenml import pipeline 
from zenml import Model
from steps.data_ingestion_step import data_ingestion_step
from steps.data_preprocessing_step import data_preprocessing_step
from steps.outlier_detection_step import outlier_detection_step 
from steps.data_splitting_step import data_splitter_step
from steps.model_building_step import model_builder_step
from steps.model_evaluation_step import model_evaluation_step
import logging
import warnings


warnings.filterwarnings("ignore", category=UserWarning, module="mlflow")


# logging.basicConfig(
#     level=logging.INFO,  # Set the logging level to INFO
#     format='%(asctime)s - %(levelname)s - %(message)s',
#     handlers=[
#         logging.FileHandler("logging.log"),  # Log to a file
#         logging.StreamHandler()  # Also log to console
#     ]
# )
@pipeline(
    model=Model(
        name="customer_churn_prediction",
    )
)
def training_pipeline():
    """Defines an end-to-end machine learning pipeline for customer churn prediction."""
    
    """Defines an end-to-end machine learning pipeline."""
    # Data Ingestion Step
    # Load raw data from the specified file path
    raw_data = data_ingestion_step("/home/sarath_kumar/customer_chrun_prediction/data/customer_churn_dataset-training-master.csv.zip")
    
    # Data Preprocessing Step
    # Preprocess the raw data to clean and format it appropriately
    cleaned_data = data_preprocessing_step(raw_data)
    
    # Outlier Detection Step
    outlier_removed_data = outlier_detection_step(cleaned_data)
    
    # Data Splitting Step
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = data_splitter_step(outlier_removed_data, target_column="Churn")
    
    # Model Building Step
    # Build and train the model using the training data
    model = model_builder_step(model_name="xgboost", X_train=X_train, y_train=y_train)

    metrics = model_evaluation_step(model, X_test, y_test)
    
    # Return the trained model
    return model


if __name__ == "__main__":
    training_pipeline()