File size: 2,737 Bytes
92b63f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier
import logging
import joblib
from config.config import REPORTS_DIR,ARTIFACTS_DIR


# Configure logging
logging.basicConfig(
    filename='/home/sarath_kumar/customer_chrun_prediction/training_log.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

logging.info("Starting training script...")

try:
    
    data = pd.read_csv("/home/sarath_kumar/customer_chrun_prediction/processed_data/processed_data.csv")
    logging.info("Dataset loaded successfully.")

   
    X = data.drop('Churn', axis=1)
    y = data['Churn']
    logging.info("Data split into features and target.")

   
    models = {
        "Logistic Regression": LogisticRegression(max_iter=500,solver='saga'),
        "Random Forest": RandomForestClassifier(),
        "Decision Tree": DecisionTreeClassifier(),
        "XGBoost": XGBClassifier(),
    }

    
    metrics_list = []

    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    logging.info("Data split into training and testing sets.")

    for model_name, model in models.items():
        logging.info(f"Training {model_name}...")
        model.fit(X_train, y_train)
        logging.info(f"{model_name} training completed.")

        y_pred = model.predict(X_test)
        logging.info(f"{model_name} prediction completed.")

    
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')

        logging.info(f"{model_name} evaluation metrics calculated.")

        
        metrics_list.append({
            "Model": model_name,
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "F1 Score": f1
        })

    metrics_df = pd.DataFrame(metrics_list)
    logging.info("Metrics DataFrame created.")
    metrics_df.to_csv(REPORTS_DIR / "model_metrics.csv", index=False)
    logging.info("Metrics saved to CSV successfully.")


    
    for model_name, model in models.items():
        joblib.dump(model, ARTIFACTS_DIR/ f"{model_name}.pkl")
        logging.info(f"{model_name} saved to file.")

    logging.info("Training script completed successfully.")

except Exception as e:
    logging.error(f"An error occurred: {e}")
    raise