Spaces:
Sleeping
Sleeping
Commit
·
3cab2dd
1
Parent(s):
40b389c
working on kmeans
Browse files- app.py +6 -9
- cluster/clusterer.py +8 -9
- cluster/distance.py +16 -0
- cluster/kmeans.py +39 -9
- cluster/kmedoids.py +18 -0
- cluster/main.py +6 -12
- cluster/opts.py +8 -2
- example/kmeans.py +27 -0
- neural_network/neural_network.py +0 -1
app.py
CHANGED
@@ -37,17 +37,14 @@ def index():
|
|
37 |
algorithm = options[request.json["algorithm"]]
|
38 |
args = request.json["arguments"]
|
39 |
|
40 |
-
#
|
41 |
-
# we should do a more real one like palmer penguins
|
42 |
-
|
43 |
X, y = iris()
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
args=args,
|
49 |
-
)
|
50 |
)
|
|
|
51 |
|
52 |
|
53 |
if __name__ == "__main__":
|
|
|
37 |
algorithm = options[request.json["algorithm"]]
|
38 |
args = request.json["arguments"]
|
39 |
|
40 |
+
# using the iris data set for every algorithm
|
|
|
|
|
41 |
X, y = iris()
|
42 |
+
result = algorithm(
|
43 |
+
X=X,
|
44 |
+
y=y,
|
45 |
+
args=args,
|
|
|
|
|
46 |
)
|
47 |
+
return jsonify(result)
|
48 |
|
49 |
|
50 |
if __name__ == "__main__":
|
cluster/clusterer.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from dataclasses import dataclass
|
2 |
from typing import Callable
|
|
|
3 |
import numpy as np
|
4 |
|
5 |
|
@@ -8,17 +9,15 @@ class Clusterer:
|
|
8 |
cluster_func: Callable
|
9 |
options: dict
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
def eval(y_pred, y_true) -> None:
|
18 |
-
return
|
19 |
|
20 |
@classmethod
|
21 |
-
def from_dict(cls, dct):
|
22 |
return cls(**dct)
|
23 |
|
24 |
def to_dict(self):
|
|
|
1 |
from dataclasses import dataclass
|
2 |
from typing import Callable
|
3 |
+
|
4 |
import numpy as np
|
5 |
|
6 |
|
|
|
9 |
cluster_func: Callable
|
10 |
options: dict
|
11 |
|
12 |
+
def eval(
|
13 |
+
self,
|
14 |
+
pred_labels: np.array,
|
15 |
+
true_labels: np.array,
|
16 |
+
) -> None:
|
17 |
+
...
|
|
|
|
|
18 |
|
19 |
@classmethod
|
20 |
+
def from_dict(cls, dct: dict):
|
21 |
return cls(**dct)
|
22 |
|
23 |
def to_dict(self):
|
cluster/distance.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
|
4 |
+
def euclidean(
|
5 |
+
point: np.array,
|
6 |
+
data: np.array,
|
7 |
+
) -> np.array:
|
8 |
+
"""
|
9 |
+
Computed the euclidean distance
|
10 |
+
between a point and the rest
|
11 |
+
of the dataset
|
12 |
+
point dims: (m,)
|
13 |
+
data dims: (n, m)
|
14 |
+
output dims: (n,)
|
15 |
+
"""
|
16 |
+
return np.sqrt(np.sum((point - data)**2), aixs=1)
|
cluster/kmeans.py
CHANGED
@@ -1,13 +1,43 @@
|
|
|
|
1 |
import numpy as np
|
2 |
|
|
|
|
|
3 |
|
4 |
-
def kmeans(
|
5 |
-
X_train: np.array,
|
6 |
-
y_train: np.array,
|
7 |
-
args: dict,
|
8 |
-
):
|
9 |
-
# for this alg, the only argument
|
10 |
-
# is the number of clusters, k
|
11 |
-
# and max iterations
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
import numpy as np
|
3 |
|
4 |
+
from cluster.distance import euclidean
|
5 |
+
from cluster.clusterer import Clusterer
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
@dataclass
|
9 |
+
class Kmeans(Clusterer):
|
10 |
+
k: int
|
11 |
+
max_iter: int
|
12 |
+
|
13 |
+
def build(
|
14 |
+
self,
|
15 |
+
X_train: np.array,
|
16 |
+
):
|
17 |
+
# Randomly select centroid start points, uniformly distributed across the domain of the dataset
|
18 |
+
minimum = np.min(X_train, axis=0)
|
19 |
+
maximum = np.max(X_train, axis=0)
|
20 |
+
centroids = [np.uniform(minimum, maximum) for _ in range(self.k)]
|
21 |
+
|
22 |
+
# loop through and cluster data
|
23 |
+
prev_centroids = 0
|
24 |
+
iteration = 0
|
25 |
+
while True:
|
26 |
+
sorted_pts = [[] for _ in range(self.k)]
|
27 |
+
for x in X_train:
|
28 |
+
dists = euclidean(x, centroids)
|
29 |
+
|
30 |
+
if not np.not_equal(
|
31 |
+
centroids,
|
32 |
+
prev_centroids,
|
33 |
+
).any():
|
34 |
+
break
|
35 |
+
if not iteration < self.k:
|
36 |
+
break
|
37 |
+
iteration += 1
|
38 |
+
|
39 |
+
def label():
|
40 |
+
...
|
41 |
+
|
42 |
+
def main(self):
|
43 |
+
return self.from_dict()
|
cluster/kmedoids.py
CHANGED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
from cluster.clusterer import Clusterer
|
5 |
+
|
6 |
+
|
7 |
+
@dataclass
|
8 |
+
class Kmedoids(Clusterer):
|
9 |
+
k: int
|
10 |
+
|
11 |
+
def build(self, X_train: np.array):
|
12 |
+
...
|
13 |
+
|
14 |
+
def label():
|
15 |
+
...
|
16 |
+
|
17 |
+
def main():
|
18 |
+
...
|
cluster/main.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from sklearn.model_selection import train_test_split
|
2 |
-
from typing import Callable
|
3 |
import numpy as np
|
4 |
|
|
|
5 |
# for determing which clustering funciton to call
|
6 |
from cluster.opts import clustering_methods
|
7 |
|
@@ -10,14 +10,8 @@ def main(
|
|
10 |
X: np.array,
|
11 |
y: np.array,
|
12 |
args: dict,
|
13 |
-
):
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
y,
|
19 |
-
test_size=0.2,
|
20 |
-
random_state=8675309,
|
21 |
-
)
|
22 |
-
|
23 |
-
return
|
|
|
1 |
from sklearn.model_selection import train_test_split
|
|
|
2 |
import numpy as np
|
3 |
|
4 |
+
from cluster.clusterer import Clusterer
|
5 |
# for determing which clustering funciton to call
|
6 |
from cluster.opts import clustering_methods
|
7 |
|
|
|
10 |
X: np.array,
|
11 |
y: np.array,
|
12 |
args: dict,
|
13 |
+
) -> dict:
|
14 |
+
cluster_alg: Clusterer = clustering_methods[args["algorithm"]]
|
15 |
+
model = cluster_alg.main(X, args)
|
16 |
+
model.eval(X, y)
|
17 |
+
return model.to_dict()
|
|
|
|
|
|
|
|
|
|
|
|
cluster/opts.py
CHANGED
@@ -1,3 +1,9 @@
|
|
1 |
-
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
}
|
|
|
1 |
+
from cluster.clusterer import Clusterer
|
2 |
+
from cluster.kmedoids import Kmedoids
|
3 |
+
from cluster.kmeans import Kmeans
|
4 |
+
|
5 |
+
|
6 |
+
clustering_methods: dict[str, Clusterer] = {
|
7 |
+
"kmeans": Kmeans,
|
8 |
+
"kmedoids": Kmedoids,
|
9 |
}
|
example/kmeans.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
|
4 |
+
ENDPOINT: str = "https://data-mining-from-scratch-backend.onrender.com/"
|
5 |
+
|
6 |
+
request_params = {
|
7 |
+
"algorithm": "kmeans",
|
8 |
+
"arguments": {
|
9 |
+
"k": 3,
|
10 |
+
"max_iter": 10,
|
11 |
+
},
|
12 |
+
}
|
13 |
+
|
14 |
+
|
15 |
+
headers = {
|
16 |
+
"Content-Type": "application/json",
|
17 |
+
}
|
18 |
+
|
19 |
+
r = requests.post(
|
20 |
+
ENDPOINT,
|
21 |
+
headers=headers,
|
22 |
+
data=json.dumps(request_params),
|
23 |
+
)
|
24 |
+
|
25 |
+
|
26 |
+
if __name__ == "__main__":
|
27 |
+
print(r.json())
|
neural_network/neural_network.py
CHANGED
@@ -25,7 +25,6 @@ class NeuralNetwork:
|
|
25 |
return self.compute_node(n1, self.w2, self.b2, self.activation_func)
|
26 |
|
27 |
def set_loss_hist(self, loss_hist: list) -> None:
|
28 |
-
assert (isinstance(loss_hist, list))
|
29 |
self.loss_history = loss_hist
|
30 |
|
31 |
def eval(self, X_test, y_test) -> None:
|
|
|
25 |
return self.compute_node(n1, self.w2, self.b2, self.activation_func)
|
26 |
|
27 |
def set_loss_hist(self, loss_hist: list) -> None:
|
|
|
28 |
self.loss_history = loss_hist
|
29 |
|
30 |
def eval(self, X_test, y_test) -> None:
|