Upload train_model.py
Browse files- train_model.py +74 -0
train_model.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from tensorflow import keras
|
4 |
+
from tensorflow.keras import layers
|
5 |
+
from tensorflow.keras.losses import BinaryCrossentropy
|
6 |
+
from sklearn.model_selection import train_test_split
|
7 |
+
from sklearn.model_selection import RandomizedSearchCV
|
8 |
+
from scikeras.wrappers import KerasClassifier
|
9 |
+
|
10 |
+
|
11 |
+
def create_stats(roster, schedule):
|
12 |
+
home_stats = []
|
13 |
+
away_stats = []
|
14 |
+
S = []
|
15 |
+
|
16 |
+
# Loading Relavent Columns from f-test
|
17 |
+
cols = ['TEAM','PTS/G', 'ORB', 'DRB', 'AST', 'STL', 'BLK', 'TOV', '3P%', 'FT%','2P']
|
18 |
+
new_roster = roster[cols]
|
19 |
+
for i in schedule['Home/Neutral']:
|
20 |
+
home_stats.append((new_roster[new_roster['TEAM'] == i]).values.tolist())
|
21 |
+
for i in schedule['Visitor/Neutral']:
|
22 |
+
away_stats.append((new_roster.loc[new_roster['TEAM'] == i]).values.tolist())
|
23 |
+
for i in range(len(home_stats)):
|
24 |
+
arr = []
|
25 |
+
for j in range(len(home_stats[i])):
|
26 |
+
del home_stats[i][j][0]
|
27 |
+
arr += home_stats[i][j]
|
28 |
+
for j in range(len(away_stats[i])):
|
29 |
+
del away_stats[i][j][0]
|
30 |
+
arr += away_stats[i][j]
|
31 |
+
|
32 |
+
# Create numpy array with all the players on the Home Team's Stats followed by the Away Team's stats
|
33 |
+
S.append(np.nan_to_num(np.array(arr), copy=False))
|
34 |
+
return S
|
35 |
+
|
36 |
+
roster = pd.read_csv('player_stats.txt', delimiter=',')
|
37 |
+
schedule = pd.read_csv('schedule.txt', delimiter=',')
|
38 |
+
|
39 |
+
# Create winning condition to train on
|
40 |
+
schedule['winner'] = schedule.apply(lambda x: 0 if x['PTS'] > x['PTS.1'] else 1, axis=1)
|
41 |
+
|
42 |
+
X = np.array(create_stats(roster, schedule))
|
43 |
+
y = np.array(schedule['winner'])
|
44 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
45 |
+
|
46 |
+
def create_model(optimizer='rmsprop', init='glorot_uniform'):
|
47 |
+
inputs = keras.Input(shape=(100,))
|
48 |
+
dense = layers.Dense(50, activation="relu")
|
49 |
+
x = dense(inputs)
|
50 |
+
x = layers.Dense(64, activation="relu")(x)
|
51 |
+
outputs = layers.Dense(1, activation='sigmoid')(x)
|
52 |
+
model = keras.Model(inputs=inputs, outputs=outputs, name="nba_model")
|
53 |
+
model.compile(loss=BinaryCrossentropy(from_logits=False), optimizer=optimizer, metrics=["accuracy"])
|
54 |
+
|
55 |
+
return model
|
56 |
+
|
57 |
+
model = KerasClassifier(model=create_model, verbose=0, init='glorot_uniform')
|
58 |
+
|
59 |
+
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
|
60 |
+
init = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
|
61 |
+
epochs = [500, 1000, 1500]
|
62 |
+
batches = [50, 100, 200]
|
63 |
+
param_grid = dict(optimizer=optimizer, epochs=epochs, batch_size=batches, init=init)
|
64 |
+
|
65 |
+
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=100, verbose=3)
|
66 |
+
random_search_result = random_search.fit(X_train, y_train)
|
67 |
+
best_model = random_search_result.best_estimator_
|
68 |
+
|
69 |
+
best_model.model_.save('winner.keras')
|
70 |
+
best_parameters = random_search_result.best_params_
|
71 |
+
print("Best parameters: ", best_parameters)
|
72 |
+
|
73 |
+
test_accuracy = random_search_result.best_estimator_.score(X_test, y_test)
|
74 |
+
print("Test accuracy: ", test_accuracy)
|