File size: 2,400 Bytes
4c97910
4e6140d
4c97910
031ac83
4c97910
031ac83
24f2542
 
 
 
 
 
 
 
 
 
 
031ac83
24f2542
 
 
 
c2ccf60
24f2542
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f9e8ef
bec1ee5
4e6140d
031ac83
24f2542
 
 
 
 
 
204251b
bec1ee5
031ac83
 
 
c777165
031ac83
 
4e6140d
 
 
 
0f9e8ef
24f2542
 
0f9e8ef
 
 
 
 
 
 
 
 
204251b
0f9e8ef
 
 
 
 
 
 
 
 
204251b
 
0f9e8ef
204251b
c777165
031ac83
4e6140d
 
 
 
 
204251b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy as np
from tqdm import tqdm

from neural_network.opts import activation


def get_args(args: dict, wb: dict):
    return (
        args["epochs"],
        args["activation_func"],
        args["func_prime"],
        args["learning_rate"],
        wb["W1"],
        wb["W2"],
        wb["b1"],
        wb["b2"],
    )


def fp(
    X_train: np.array,
    y_train: np.array,
    activation: callable,
    w1: np.array,
    w2: np.array,
    b1: np.array,
    b2: np.array,
):
    n1 = compute_node(arr=X_train, w=w1, b=b1, func=activation)
    y_hat = compute_node(arr=n1, w=w2, b=b2, func=activation)
    return y_hat, n1, (y_hat-y_train)


def bp(
    X_train: np.array,
    y_train: np.array,
    wb: dict,
    args: dict
):
    epochs, func, func_prime, lr, w1, w2, b1, b2 = get_args(args, wb)
    r = {}
    loss_history = []
    for e in tqdm(range(epochs)):
        # forward prop
        y_hat, node1, error = fp(
            X_train=X_train,
            y_train=y_train,
            actiavtion=func,
            w1=w1, w2=w2, b1=b1, b2=b2,
        )
        mean_squared_error = mse(y_train, y_hat)
        loss_history.append(mean_squared_error)

        # backprop
        dw1 = np.dot(
            X_train.T,
            np.dot(error * func_prime(y_hat), w2.T) * func_prime(node1),
        )
        dw2 = np.dot(
            node1.T,
            error * func_prime(y_hat),
        )
        db2 = np.sum(error * func_prime(y_hat), axis=0)
        db1 = np.sum(np.dot(error * func_prime(y_hat), w2.T)
                     * func_prime(node1), axis=0)

        # update weights & biases using gradient descent.
        # this is -= and not += because if the gradient descent
        # is positive, we want to go down.
        w1 -= (lr * dw1)
        w2 -= (lr * dw2)
        b1 -= (lr * db1)
        b2 -= (lr * db2)

        # keeping track of each epochs' numbers
        r[e] = {
            "W1": w1,
            "W2": w2,
            "b1": b1,
            "b2": b2,
            "dw1": dw1,
            "dw2": dw2,
            "db1": db1,
            "db2": db2,
            "error": error,
            "mse": mean_squared_error,
        }
    return r, loss_history


def compute_node(arr, w, b, func):
    """
    Computes nodes during forward prop
    """
    return func(np.dot(arr, w) + b)


def mse(y: np.array, y_hat: np.array):
    return np.mean((y - y_hat) ** 2)