Spaces:
Sleeping
Sleeping
Commit
·
932b3cb
1
Parent(s):
31747ca
starting work with clustering algorithms
Browse files- .vscode/settings.json +6 -0
- README.md +3 -4
- cluster/clusterer.py +28 -0
- cluster/kmeans.py +13 -0
- cluster/main.py +22 -1
- cluster/opts.py +1 -1
- dataset/random.py +0 -14
- example/{main.py → neural_network.py} +0 -0
.vscode/settings.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[python]": {
|
3 |
+
"editor.defaultFormatter": "ms-python.autopep8"
|
4 |
+
},
|
5 |
+
"python.formatting.provider": "none"
|
6 |
+
}
|
README.md
CHANGED
@@ -40,10 +40,9 @@ print(model_data)
|
|
40 |
|
41 |
- Algorithm: <br>
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
* `"heirarchical-clustering"` <br>
|
47 |
|
48 |
- Algorithm Specific Arguments
|
49 |
|
|
|
40 |
|
41 |
- Algorithm: <br>
|
42 |
|
43 |
+
-`"neural-network"` <br>
|
44 |
+
|
45 |
+
- `"kmeans-clustering"` <br> -`"kmedoid-clustering"` <br> -`"heirarchical-clustering"` <br>
|
|
|
46 |
|
47 |
- Algorithm Specific Arguments
|
48 |
|
cluster/clusterer.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import Callable
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
|
6 |
+
@dataclass
|
7 |
+
class Clusterer:
|
8 |
+
cluster_func: Callable
|
9 |
+
options: dict
|
10 |
+
|
11 |
+
accuracy: float = 0
|
12 |
+
|
13 |
+
@staticmethod
|
14 |
+
def label():
|
15 |
+
return
|
16 |
+
|
17 |
+
def eval(y_pred, y_true) -> None:
|
18 |
+
return
|
19 |
+
|
20 |
+
@classmethod
|
21 |
+
def from_dict(cls, dct):
|
22 |
+
return cls(**dct)
|
23 |
+
|
24 |
+
def to_dict(self):
|
25 |
+
return {
|
26 |
+
"cluster_method": self.cluster_func.__name__,
|
27 |
+
"options": self.options,
|
28 |
+
}
|
cluster/kmeans.py
CHANGED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
|
4 |
+
def kmeans(
|
5 |
+
X_train: np.array,
|
6 |
+
y_train: np.array,
|
7 |
+
args: dict,
|
8 |
+
):
|
9 |
+
# for this alg, the only argument
|
10 |
+
# is the number of clusters, k
|
11 |
+
# and max iterations
|
12 |
+
|
13 |
+
return
|
cluster/main.py
CHANGED
@@ -1,2 +1,23 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
return
|
|
|
1 |
+
from sklearn.model_selection import train_test_split
|
2 |
+
from typing import Callable
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
# for determing which clustering funciton to call
|
6 |
+
from cluster.opts import clustering_methods
|
7 |
+
|
8 |
+
|
9 |
+
def main(
|
10 |
+
X: np.array,
|
11 |
+
y: np.array,
|
12 |
+
args: dict,
|
13 |
+
):
|
14 |
+
|
15 |
+
cluster_alg: Callable = clustering_methods[args["algorithm"]]
|
16 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
17 |
+
X,
|
18 |
+
y,
|
19 |
+
test_size=0.2,
|
20 |
+
random_state=8675309,
|
21 |
+
)
|
22 |
+
|
23 |
return
|
cluster/opts.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
clustering_methods = {
|
2 |
-
|
3 |
}
|
|
|
1 |
clustering_methods = {
|
2 |
+
"kmeans": "KMeans",
|
3 |
}
|
dataset/random.py
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
|
3 |
-
|
4 |
-
def random_dataset(rows: int, features: int):
|
5 |
-
"""
|
6 |
-
the random_dataset function is used to
|
7 |
-
generate a random normal distribution of
|
8 |
-
data for testing different machine learning
|
9 |
-
algorithms specific to this project
|
10 |
-
"""
|
11 |
-
rng = np.random.default_rng()
|
12 |
-
X = rng.normal(size=(rows, features))
|
13 |
-
y = rng.integers(5, size=(rows, 1))
|
14 |
-
return X, y
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
example/{main.py → neural_network.py}
RENAMED
File without changes
|