carlosgonzalezmartinez commited on
Commit
f85aa5b
·
verified ·
1 Parent(s): 3129fa5

Delete train.py

Browse files
Files changed (1) hide show
  1. train.py +0 -95
train.py DELETED
@@ -1,95 +0,0 @@
1
- import joblib
2
-
3
- from sklearn.datasets import fetch_openml
4
-
5
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
6
- from sklearn.compose import make_column_transformer
7
-
8
- from sklearn.pipeline import make_pipeline
9
-
10
- from sklearn.model_selection import train_test_split, RandomizedSearchCV
11
-
12
- from sklearn.linear_model import LogisticRegression
13
- from sklearn.metrics import accuracy_score, classification_report
14
-
15
- import pandas as pd
16
- import numpy as np
17
- import matplotlib.pyplot as plt
18
- import seaborn as sns
19
- from warnings import filterwarnings
20
- filterwarnings('ignore')
21
-
22
- df = pd.read_csv("/content/forest_health_data_with_target.csv")
23
-
24
-
25
- target = 'Health_Status'
26
- numeric_features = [
27
- 'Latitude',
28
- 'Longitude',
29
- 'DBH',
30
- 'Tree_Height',
31
- 'Crown_Width_North_South',
32
- 'Crown_Width_East_West',
33
- 'Slope',
34
- 'Elevation',
35
- 'Temperature',
36
- 'Humidity',
37
- 'Soil_TN',
38
- 'Soil_TP',
39
- 'Soil_AP',
40
- 'Soil_AN',
41
- 'Menhinick_Index',
42
- 'Gleason_Index',
43
- 'Fire_Risk_Index'
44
-
45
- ]
46
-
47
-
48
-
49
-
50
- print("Creating data subsets")
51
-
52
- X = df[numeric_features]
53
- y = df[target]
54
-
55
- Xtrain, Xtest, ytrain, ytest = train_test_split(
56
- X, y,
57
- test_size=0.2,
58
- random_state=42
59
- )
60
-
61
- preprocessor = make_column_transformer(
62
- (StandardScaler(), numeric_features),
63
- )
64
-
65
- model_logistic_regression = LogisticRegression(n_jobs=-1)
66
-
67
- print("Estimating Best Model Pipeline")
68
-
69
- model_pipeline = make_pipeline(
70
- preprocessor,
71
- model_logistic_regression
72
- )
73
-
74
- param_distribution = {
75
- "logisticregression__C": [0.001, 0.01, 0.1, 0.5, 1]
76
- }
77
-
78
- rand_search_cv = RandomizedSearchCV(
79
- model_pipeline,
80
- param_distribution,
81
- n_iter=3,
82
- cv=3,
83
- random_state=42
84
- )
85
-
86
- rand_search_cv.fit(Xtrain, ytrain)
87
-
88
- print("Logging Metrics")
89
- print(f"Accuracy: {rand_search_cv.best_score_}")
90
-
91
- print("Serializing Model")
92
-
93
- saved_model_path = "model.joblib"
94
-
95
- joblib.dump(rand_search_cv.best_estimator_, saved_model_path)