Spaces:
Runtime error
Runtime error
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score | |
from xgboost import XGBClassifier | |
import logging | |
import joblib | |
from config.config import REPORTS_DIR,ARTIFACTS_DIR | |
# Configure logging | |
logging.basicConfig( | |
filename='/home/sarath_kumar/customer_chrun_prediction/training_log.log', | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
logging.info("Starting training script...") | |
try: | |
data = pd.read_csv("/home/sarath_kumar/customer_chrun_prediction/processed_data/processed_data.csv") | |
logging.info("Dataset loaded successfully.") | |
X = data.drop('Churn', axis=1) | |
y = data['Churn'] | |
logging.info("Data split into features and target.") | |
models = { | |
"Logistic Regression": LogisticRegression(max_iter=500,solver='saga'), | |
"Random Forest": RandomForestClassifier(), | |
"Decision Tree": DecisionTreeClassifier(), | |
"XGBoost": XGBClassifier(), | |
} | |
metrics_list = [] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) | |
logging.info("Data split into training and testing sets.") | |
for model_name, model in models.items(): | |
logging.info(f"Training {model_name}...") | |
model.fit(X_train, y_train) | |
logging.info(f"{model_name} training completed.") | |
y_pred = model.predict(X_test) | |
logging.info(f"{model_name} prediction completed.") | |
accuracy = accuracy_score(y_test, y_pred) | |
precision = precision_score(y_test, y_pred, average='weighted') | |
recall = recall_score(y_test, y_pred, average='weighted') | |
f1 = f1_score(y_test, y_pred, average='weighted') | |
logging.info(f"{model_name} evaluation metrics calculated.") | |
metrics_list.append({ | |
"Model": model_name, | |
"Accuracy": accuracy, | |
"Precision": precision, | |
"Recall": recall, | |
"F1 Score": f1 | |
}) | |
metrics_df = pd.DataFrame(metrics_list) | |
logging.info("Metrics DataFrame created.") | |
metrics_df.to_csv(REPORTS_DIR / "model_metrics.csv", index=False) | |
logging.info("Metrics saved to CSV successfully.") | |
for model_name, model in models.items(): | |
joblib.dump(model, ARTIFACTS_DIR/ f"{model_name}.pkl") | |
logging.info(f"{model_name} saved to file.") | |
logging.info("Training script completed successfully.") | |
except Exception as e: | |
logging.error(f"An error occurred: {e}") | |
raise | |