|
import pandas as pd |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.tree import DecisionTreeClassifier |
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score |
|
from xgboost import XGBClassifier |
|
import logging |
|
import joblib |
|
from config.config import REPORTS_DIR,ARTIFACTS_DIR |
|
|
|
|
|
|
|
logging.basicConfig( |
|
filename='/home/sarath_kumar/customer_chrun_prediction/training_log.log', |
|
level=logging.INFO, |
|
format='%(asctime)s - %(levelname)s - %(message)s' |
|
) |
|
|
|
logging.info("Starting training script...") |
|
|
|
try: |
|
|
|
data = pd.read_csv("/home/sarath_kumar/customer_chrun_prediction/processed_data/processed_data.csv") |
|
logging.info("Dataset loaded successfully.") |
|
|
|
|
|
X = data.drop('Churn', axis=1) |
|
y = data['Churn'] |
|
logging.info("Data split into features and target.") |
|
|
|
|
|
models = { |
|
"Logistic Regression": LogisticRegression(max_iter=500,solver='saga'), |
|
"Random Forest": RandomForestClassifier(), |
|
"Decision Tree": DecisionTreeClassifier(), |
|
"XGBoost": XGBClassifier(), |
|
} |
|
|
|
|
|
metrics_list = [] |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) |
|
logging.info("Data split into training and testing sets.") |
|
|
|
for model_name, model in models.items(): |
|
logging.info(f"Training {model_name}...") |
|
model.fit(X_train, y_train) |
|
logging.info(f"{model_name} training completed.") |
|
|
|
y_pred = model.predict(X_test) |
|
logging.info(f"{model_name} prediction completed.") |
|
|
|
|
|
accuracy = accuracy_score(y_test, y_pred) |
|
precision = precision_score(y_test, y_pred, average='weighted') |
|
recall = recall_score(y_test, y_pred, average='weighted') |
|
f1 = f1_score(y_test, y_pred, average='weighted') |
|
|
|
logging.info(f"{model_name} evaluation metrics calculated.") |
|
|
|
|
|
metrics_list.append({ |
|
"Model": model_name, |
|
"Accuracy": accuracy, |
|
"Precision": precision, |
|
"Recall": recall, |
|
"F1 Score": f1 |
|
}) |
|
|
|
metrics_df = pd.DataFrame(metrics_list) |
|
logging.info("Metrics DataFrame created.") |
|
metrics_df.to_csv(REPORTS_DIR / "model_metrics.csv", index=False) |
|
logging.info("Metrics saved to CSV successfully.") |
|
|
|
|
|
|
|
for model_name, model in models.items(): |
|
joblib.dump(model, ARTIFACTS_DIR/ f"{model_name}.pkl") |
|
logging.info(f"{model_name} saved to file.") |
|
|
|
logging.info("Training script completed successfully.") |
|
|
|
except Exception as e: |
|
logging.error(f"An error occurred: {e}") |
|
raise |
|
|