Spaces:
Sleeping
Sleeping
File size: 3,539 Bytes
29cce3f 03176c2 84bbd7d 29cce3f 9e506b7 303df47 84bbd7d 9e506b7 84bbd7d 9e506b7 880505a 38e3b7b 8c348c5 880505a 303df47 29cce3f 9e506b7 29cce3f 303df47 29cce3f 303df47 880505a 84bbd7d 9e506b7 880505a 84bbd7d 880505a 84bbd7d 880505a 2fc4a94 303df47 2fc4a94 84bbd7d 9e506b7 880505a 9e506b7 880505a 2fc4a94 303df47 880505a 84bbd7d 880505a 9e506b7 03176c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from typing import Callable
from nn.nn import NN
import numpy as np
def init_weights_biases(nn: NN):
np.random.seed(0)
bh = np.zeros((1, nn.hidden_size))
bo = np.zeros((1, nn.output_size))
wh = np.random.randn(nn.input_size, nn.hidden_size) * \
np.sqrt(2 / nn.input_size)
wo = np.random.randn(nn.hidden_size, nn.output_size) * \
np.sqrt(2 / nn.hidden_size)
return wh, wo, bh, bo
def train(nn: NN) -> dict:
wh, wo, bh, bo = init_weights_biases(nn=nn)
X_train, X_test, y_train, y_test = train_test_split(
nn.X.to_numpy(),
nn.y_dummy.to_numpy(),
test_size=nn.test_size,
random_state=0,
)
accuracy_scores = []
loss_hist: list[float] = []
for _ in range(nn.epochs):
# compute hidden output
hidden_output = compute_node(
data=X_train,
weights=wh,
biases=bh,
func=nn.func,
)
# compute output layer
y_hat = compute_node(
data=hidden_output,
weights=wo,
biases=bo,
func=nn.func,
)
# compute error & store it
error = y_hat - y_train
loss = log_loss(y_true=y_train, y_pred=y_hat)
accuracy = accuracy_score(y_true=y_train, y_pred=y_hat)
accuracy_scores.append(accuracy)
loss_hist.append(loss)
# compute derivatives of weights & biases
# update weights & biases using gradient descent after
# computing derivatives.
dwo = nn.learning_rate * output_weight_prime(hidden_output, error)
# Use NumPy to sum along the first axis (axis=0)
# and then reshape to match the shape of bo
dbo = nn.learning_rate * np.sum(output_bias_prime(error), axis=0)
dhidden = np.dot(error, wo.T) * nn.func_prime(hidden_output)
dwh = nn.learning_rate * hidden_weight_prime(X_train, dhidden)
dbh = nn.learning_rate * hidden_bias_prime(dhidden)
wh -= dwh
wo -= dwo
bh -= dbh
bo -= dbo
# compute final predictions on data not seen
hidden_output_test = compute_node(
data=X_test,
weights=wh,
biases=bh,
func=nn.func,
)
y_hat = compute_node(
data=hidden_output_test,
weights=wo,
biases=bo,
func=nn.func,
)
return {
"loss_hist": loss_hist,
"log_loss": log_loss(y_true=y_test, y_pred=y_hat),
"accuracy": accuracy_score(y_true=y_test, y_pred=y_hat),
"accuracy_scores": accuracy_scores,
}
def compute_node(data: np.array, weights: np.array, biases: np.array, func: Callable) -> np.array:
return func(np.dot(data, weights) + biases)
def mean_squared_error(y: np.array, y_hat: np.array) -> np.array:
return np.mean((y - y_hat) ** 2)
def hidden_bias_prime(error):
return np.sum(error, axis=0)
def output_bias_prime(error):
return np.sum(error, axis=0)
def hidden_weight_prime(data, error):
return np.dot(data.T, error)
def output_weight_prime(hidden_output, error):
return np.dot(hidden_output.T, error)
def accuracy_score(y_true, y_pred):
# Ensure y_true and y_pred have the same shape
if y_true.shape != y_pred.shape:
raise ValueError("Input shapes do not match.")
# Calculate the accuracy
num_samples = len(y_true)
num_correct = np.sum(y_true == y_pred)
return num_correct / num_samples
|