Spaces:
Runtime error
Runtime error
File size: 5,646 Bytes
c5301d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
# src: https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e
class LinearRegressionGradient:
def __init__(self, theta=None):
self.theta = theta
self.loss_ = float("inf")
def hypothesis(self, x):
return self.theta[0] + self.theta[1] * x
def loss(self, X, y):
m = len(X)
return sum([(X[i] - y[i]) ** 2 for i in range(m)]) / (2 * m)
def gradientDescent(self, X, y, theta, num_iter=3000, alpha=0.01):
m = len(X)
for j in range(num_iter):
# predict
h = list(map(self.hypothesis, X))
# compute slope, aka derivative with current params (theta)
deri_th0 = sum([(h[i] - y[i]) for i in range(m)]) / m
deri_th1 = sum([(h[i] - y[i]) * X[i] for i in range(m)]) / m
# update parameters (moving against the gradient 'derivative')
theta[0] = theta[0] - alpha * deri_th0
theta[1] = theta[1] - alpha * deri_th1
# report
if j % 200 == 0:
self.loss_ = self.loss(X, y)
msg = f"loss: {self.loss_}"
print(msg)
def app():
import streamlit as st
def header():
st.subheader("Linear Regression using Gradient Descent")
desc = """> Plain Python (vanilla version) i.e. without importing any library"""
st.markdown(desc)
header()
st1, st2 = st.columns(2)
with st1:
code_math()
with st2:
interactive_run()
st.markdown(
f"> source [notebook](https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e)."
)
def code_math():
import inspect
import streamlit as st
tex = st.latex
write = st.write
mark = st.write
codify = lambda func: st.code(inspect.getsource(func), language="python")
cls = LinearRegressionGradient(theta=[0, 0])
write("The class")
codify(cls.__init__)
write("the Hypothesis")
tex(r"""h_\theta(x) = \theta_0 + \theta_1x""")
codify(cls.hypothesis)
mark('The Loss/Objective/Cost function "_minimize_"')
tex(r"""J(\theta_0, \theta_1) = \frac{1}{2m}\sum(h_\theta(x^{(i)}) - y^{(i)})^2""")
codify(cls.loss)
write("The Gradient Descent algorithm")
mark("> repeat until converge {")
tex(
r"""\theta_0 = \theta_0 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)} )"""
)
tex(
r"""\theta_1 = \theta_1 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)}) x^{(i)})"""
)
mark("> }")
codify(cls.gradientDescent)
def interactive_run():
import streamlit as st
import numpy as np
mark = st.markdown
tex = st.latex
def random_data(n=10):
def sample_linear_regression_dataset(n):
# src: https://www.gaussianwaves.com/2020/01/generating-simulated-dataset-for-regression-problems-sklearn-make_regression/
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt # for plotting
x, y, coef = datasets.make_regression(
n_samples=n, # number of samples
n_features=1, # number of features
n_informative=1, # number of useful features
noise=40, # bias and standard deviation of the guassian noise
coef=True, # true coefficient used to generated the data
random_state=0,
) # set for same data points for each run
# Scale feature x (years of experience) to range 0..20
# x = np.interp(x, (x.min(), x.max()), (0, 20))
# Scale target y (salary) to range 20000..150000
# y = np.interp(y, (y.min(), y.max()), (20000, 150000))
plt.ion() # interactive plot on
plt.plot(x, y, ".", label="training data")
plt.xlabel("Years of experience")
plt.ylabel("Salary $")
plt.title("Experience Vs. Salary")
# st.pyplot(plt.show())
# st.write(type(x.tolist()))
# st.write(x.tolist())
X, y = x.reshape(x.shape[0],), y.reshape(
y.shape[0],
)
return np.around(X, 2), np.around(y, 2)
# return [a[0] for a in x.tolist()], [a[0] for a in y.tolist()]
# return [item for sublist in x.tolist() for item in sublist], [
# item for sublist in y for item in sublist
# ]
X_, y_ = sample_linear_regression_dataset(n)
return X_, y_
# st.write(type(X_), type(y_))
# st.write(type(np.round(X, 2).tolist()))
# st.write(X_) # , y_)
# return X, y
# return np.around(X, 2).tolist(), np.around(y, 2).tolist()
X, y = random_data()
theta = [0, 0] # initial values
model = LinearRegressionGradient(theta)
n = st.slider("Number of samples", min_value=10, max_value=200, step=10)
if st.button("generate new data"):
X, y = random_data(n=n)
mark("_Input_")
mark(f"_X_ = {X}")
mark(f"_y_ = {y}")
model.gradientDescent(X, y, theta) # run to optimize thetas
mark("_Solution_")
tex(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x") # print solution
tex(f"loss = {model.loss_}")
# -- visualize
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.scatter(X, y, label="Linear Relation")
y_pred = theta[0] + theta[1] * np.array(X)
ax.plot(X, y_pred)
ax.grid(color="black", linestyle="--", linewidth=0.5, markevery=int)
ax.legend(loc=2)
# ax.axis("scaled")
st.pyplot(fig)
# st.line_chart(X, y)
|