from sklearn.model_selection import train_test_split from sklearn.metrics import log_loss from typing import Callable from nn.nn import NN import numpy as np def init_weights_biases(nn: NN): np.random.seed(0) bh = np.zeros((1, nn.hidden_size)) bo = np.zeros((1, nn.output_size)) wh = np.random.randn(nn.input_size, nn.hidden_size) * \ np.sqrt(2 / nn.input_size) wo = np.random.randn(nn.hidden_size, nn.output_size) * \ np.sqrt(2 / nn.hidden_size) return wh, wo, bh, bo def train(nn: NN) -> dict: wh, wo, bh, bo = init_weights_biases(nn=nn) X_train, X_test, y_train, y_test = train_test_split( nn.X.to_numpy(), nn.y_dummy.to_numpy(), test_size=nn.test_size, random_state=0, ) accuracy_scores = [] loss_hist: list[float] = [] for _ in range(nn.epochs): # compute hidden output hidden_output = compute_node( data=X_train, weights=wh, biases=bh, func=nn.func, ) # compute output layer y_hat = compute_node( data=hidden_output, weights=wo, biases=bo, func=nn.func, ) # compute error & store it error = y_hat - y_train loss = log_loss(y_true=y_train, y_pred=y_hat) accuracy = accuracy_score(y_true=y_train, y_pred=y_hat) accuracy_scores.append(accuracy) loss_hist.append(loss) # compute derivatives of weights & biases # update weights & biases using gradient descent after # computing derivatives. dwo = nn.learning_rate * output_weight_prime(hidden_output, error) # Use NumPy to sum along the first axis (axis=0) # and then reshape to match the shape of bo dbo = nn.learning_rate * np.sum(output_bias_prime(error), axis=0) dhidden = np.dot(error, wo.T) * nn.func_prime(hidden_output) dwh = nn.learning_rate * hidden_weight_prime(X_train, dhidden) dbh = nn.learning_rate * hidden_bias_prime(dhidden) wh -= dwh wo -= dwo bh -= dbh bo -= dbo # compute final predictions on data not seen hidden_output_test = compute_node( data=X_test, weights=wh, biases=bh, func=nn.func, ) y_hat = compute_node( data=hidden_output_test, weights=wo, biases=bo, func=nn.func, ) return { "loss_hist": loss_hist, "log_loss": log_loss(y_true=y_test, y_pred=y_hat), "accuracy": accuracy_score(y_true=y_test, y_pred=y_hat), "accuracy_scores": accuracy_scores, } def compute_node(data: np.array, weights: np.array, biases: np.array, func: Callable) -> np.array: return func(np.dot(data, weights) + biases) def mean_squared_error(y: np.array, y_hat: np.array) -> np.array: return np.mean((y - y_hat) ** 2) def hidden_bias_prime(error): return np.sum(error, axis=0) def output_bias_prime(error): return np.sum(error, axis=0) def hidden_weight_prime(data, error): return np.dot(data.T, error) def output_weight_prime(hidden_output, error): return np.dot(hidden_output.T, error) def accuracy_score(y_true, y_pred): # Ensure y_true and y_pred have the same shape if y_true.shape != y_pred.shape: raise ValueError("Input shapes do not match.") # Calculate the accuracy num_samples = len(y_true) num_correct = np.sum(y_true == y_pred) return num_correct / num_samples