Jensen-holm commited on
Commit
932b3cb
·
1 Parent(s): 31747ca

starting work with clustering algorithms

Browse files
.vscode/settings.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "[python]": {
3
+ "editor.defaultFormatter": "ms-python.autopep8"
4
+ },
5
+ "python.formatting.provider": "none"
6
+ }
README.md CHANGED
@@ -40,10 +40,9 @@ print(model_data)
40
 
41
  - Algorithm: <br>
42
 
43
- * `"neural-network"` <br>
44
- * `"kmeans-clustering"` <br>
45
- * `"kmedoid-clustering"` <br>
46
- * `"heirarchical-clustering"` <br>
47
 
48
  - Algorithm Specific Arguments
49
 
 
40
 
41
  - Algorithm: <br>
42
 
43
+ -`"neural-network"` <br>
44
+
45
+ - `"kmeans-clustering"` <br> -`"kmedoid-clustering"` <br> -`"heirarchical-clustering"` <br>
 
46
 
47
  - Algorithm Specific Arguments
48
 
cluster/clusterer.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Callable
3
+ import numpy as np
4
+
5
+
6
+ @dataclass
7
+ class Clusterer:
8
+ cluster_func: Callable
9
+ options: dict
10
+
11
+ accuracy: float = 0
12
+
13
+ @staticmethod
14
+ def label():
15
+ return
16
+
17
+ def eval(y_pred, y_true) -> None:
18
+ return
19
+
20
+ @classmethod
21
+ def from_dict(cls, dct):
22
+ return cls(**dct)
23
+
24
+ def to_dict(self):
25
+ return {
26
+ "cluster_method": self.cluster_func.__name__,
27
+ "options": self.options,
28
+ }
cluster/kmeans.py CHANGED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def kmeans(
5
+ X_train: np.array,
6
+ y_train: np.array,
7
+ args: dict,
8
+ ):
9
+ # for this alg, the only argument
10
+ # is the number of clusters, k
11
+ # and max iterations
12
+
13
+ return
cluster/main.py CHANGED
@@ -1,2 +1,23 @@
1
- def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  return
 
1
+ from sklearn.model_selection import train_test_split
2
+ from typing import Callable
3
+ import numpy as np
4
+
5
+ # for determing which clustering funciton to call
6
+ from cluster.opts import clustering_methods
7
+
8
+
9
+ def main(
10
+ X: np.array,
11
+ y: np.array,
12
+ args: dict,
13
+ ):
14
+
15
+ cluster_alg: Callable = clustering_methods[args["algorithm"]]
16
+ X_train, X_test, y_train, y_test = train_test_split(
17
+ X,
18
+ y,
19
+ test_size=0.2,
20
+ random_state=8675309,
21
+ )
22
+
23
  return
cluster/opts.py CHANGED
@@ -1,3 +1,3 @@
1
  clustering_methods = {
2
-
3
  }
 
1
  clustering_methods = {
2
+ "kmeans": "KMeans",
3
  }
dataset/random.py DELETED
@@ -1,14 +0,0 @@
1
- import numpy as np
2
-
3
-
4
- def random_dataset(rows: int, features: int):
5
- """
6
- the random_dataset function is used to
7
- generate a random normal distribution of
8
- data for testing different machine learning
9
- algorithms specific to this project
10
- """
11
- rng = np.random.default_rng()
12
- X = rng.normal(size=(rows, features))
13
- y = rng.integers(5, size=(rows, 1))
14
- return X, y
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
example/{main.py → neural_network.py} RENAMED
File without changes