Spaces:

Jensen-holm
/

Numpy-Neuron

Sleeping

Jensen-holm commited on May 8, 2023

Commit

3cab2dd

1 Parent(s): 40b389c

working on kmeans

Files changed (9) hide show

app.py CHANGED Viewed

@@ -37,17 +37,14 @@ def index():
     algorithm = options[request.json["algorithm"]]
     args = request.json["arguments"]
-    # in the future instead of a random data set
-    # we should do a more real one like palmer penguins
     X, y = iris()
-    return jsonify(
-        algorithm(
-            X=X,
-            y=y,
-            args=args,
-        )
     )
 if __name__ == "__main__":

     algorithm = options[request.json["algorithm"]]
     args = request.json["arguments"]
+    # using the iris data set for every algorithm
     X, y = iris()
+    result = algorithm(
+        X=X,
+        y=y,
+        args=args,
     )
+    return jsonify(result)
 if __name__ == "__main__":

cluster/clusterer.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from dataclasses import dataclass
 from typing import Callable
 import numpy as np
@@ -8,17 +9,15 @@ class Clusterer:
     cluster_func: Callable
     options: dict
-    accuracy: float = 0
-    @staticmethod
-    def label():
-        return
-    def eval(y_pred, y_true) -> None:
-        return
     @classmethod
-    def from_dict(cls, dct):
         return cls(**dct)
     def to_dict(self):

 from dataclasses import dataclass
 from typing import Callable
 import numpy as np
     cluster_func: Callable
     options: dict
+    def eval(
+        self,
+        pred_labels: np.array,
+        true_labels: np.array,
+    ) -> None:
+        ...
     @classmethod
+    def from_dict(cls, dct: dict):
         return cls(**dct)
     def to_dict(self):

cluster/distance.py ADDED Viewed

+import numpy as np
+def euclidean(
+    point: np.array,
+    data: np.array,
+) -> np.array:
+    """
+    Computed the euclidean distance
+    between a point and the rest
+    of the dataset
+    point dims: (m,)
+    data dims: (n, m)
+    output dims: (n,)
+    """
+    return np.sqrt(np.sum((point - data)**2), aixs=1)

cluster/kmeans.py CHANGED Viewed

@@ -1,13 +1,43 @@
 import numpy as np
-def kmeans(
-    X_train: np.array,
-    y_train: np.array,
-    args: dict,
-):
-    # for this alg, the only argument
-    # is the number of clusters, k
-    # and max iterations
-    return

+from dataclasses import dataclass
 import numpy as np
+from cluster.distance import euclidean
+from cluster.clusterer import Clusterer
+@dataclass
+class Kmeans(Clusterer):
+    k: int
+    max_iter: int
+    def build(
+        self,
+        X_train: np.array,
+    ):
+        # Randomly select centroid start points, uniformly distributed across the domain of the dataset
+        minimum = np.min(X_train, axis=0)
+        maximum = np.max(X_train, axis=0)
+        centroids = [np.uniform(minimum, maximum) for _ in range(self.k)]
+        # loop through and cluster data
+        prev_centroids = 0
+        iteration = 0
+        while True:
+            sorted_pts = [[] for _ in range(self.k)]
+            for x in X_train:
+                dists = euclidean(x, centroids)
+            if not np.not_equal(
+                centroids,
+                prev_centroids,
+            ).any():
+                break
+            if not iteration < self.k:
+                break
+            iteration += 1
+    def label():
+        ...
+    def main(self):
+        return self.from_dict()

cluster/kmedoids.py CHANGED Viewed

+from dataclasses import dataclass
+import numpy as np
+from cluster.clusterer import Clusterer
+@dataclass
+class Kmedoids(Clusterer):
+    k: int
+    def build(self, X_train: np.array):
+        ...
+    def label():
+        ...
+    def main():
+        ...

cluster/main.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from sklearn.model_selection import train_test_split
-from typing import Callable
 import numpy as np
 # for determing which clustering funciton to call
 from cluster.opts import clustering_methods
@@ -10,14 +10,8 @@ def main(
     X: np.array,
     y: np.array,
     args: dict,
-):
-    cluster_alg: Callable = clustering_methods[args["algorithm"]]
-    X_train, X_test, y_train, y_test = train_test_split(
-        X,
-        y,
-        test_size=0.2,
-        random_state=8675309,
-    )
-    return

 from sklearn.model_selection import train_test_split
 import numpy as np
+from cluster.clusterer import Clusterer
 # for determing which clustering funciton to call
 from cluster.opts import clustering_methods
     X: np.array,
     y: np.array,
     args: dict,
+) -> dict:
+    cluster_alg: Clusterer = clustering_methods[args["algorithm"]]
+    model = cluster_alg.main(X, args)
+    model.eval(X, y)
+    return model.to_dict()

cluster/opts.py CHANGED Viewed

@@ -1,3 +1,9 @@
-clustering_methods = {
-    "kmeans": "KMeans",
 }

+from cluster.clusterer import Clusterer
+from cluster.kmedoids import Kmedoids
+from cluster.kmeans import Kmeans
+clustering_methods: dict[str, Clusterer] = {
+    "kmeans": Kmeans,
+    "kmedoids": Kmedoids,
 }

example/kmeans.py ADDED Viewed

+import requests
+import json
+ENDPOINT: str = "https://data-mining-from-scratch-backend.onrender.com/"
+request_params = {
+    "algorithm": "kmeans",
+    "arguments": {
+        "k": 3,
+        "max_iter": 10,
+    },
+}
+headers = {
+    "Content-Type": "application/json",
+}
+r = requests.post(
+    ENDPOINT,
+    headers=headers,
+    data=json.dumps(request_params),
+)
+if __name__ == "__main__":
+    print(r.json())

neural_network/neural_network.py CHANGED Viewed

@@ -25,7 +25,6 @@ class NeuralNetwork:
         return self.compute_node(n1, self.w2, self.b2, self.activation_func)
     def set_loss_hist(self, loss_hist: list) -> None:
-        assert (isinstance(loss_hist, list))
         self.loss_history = loss_hist
     def eval(self, X_test, y_test) -> None:

         return self.compute_node(n1, self.w2, self.b2, self.activation_func)
     def set_loss_hist(self, loss_hist: list) -> None:
         self.loss_history = loss_hist
     def eval(self, X_test, y_test) -> None: