Commit
·
73d009a
1
Parent(s):
0d09ea1
Adding type hints to the functions in these scripts
Browse files- Data_Plotting/Plot_TSNE.py +20 -13
Data_Plotting/Plot_TSNE.py
CHANGED
|
@@ -2,28 +2,35 @@ from sklearn.manifold import TSNE
|
|
| 2 |
import matplotlib.pyplot as plt
|
| 3 |
import numpy as np
|
| 4 |
|
|
|
|
| 5 |
# Latent Feature Cluster for Training Data using T-SNE
|
| 6 |
-
def TSNE_reduction(latent_points:
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
model = TSNE(n_components=2, random_state=0, perplexity=perplexity,
|
| 9 |
-
learning_rate=learning_rate)
|
| 10 |
-
embedding = model
|
| 11 |
-
# configuring the parameters
|
| 12 |
# the number of components = dimension of the embedded space
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
#
|
| 18 |
-
tsne_data = model.fit_transform(
|
| 19 |
-
latent_points) # When there are more data points, trainX should be the first couple hundred points so TSNE doesn't take too long
|
| 20 |
x = tsne_data[:, 0]
|
| 21 |
y = tsne_data[:, 1]
|
| 22 |
title = ("T-SNE of Data")
|
| 23 |
return x, y, title, embedding
|
| 24 |
|
| 25 |
|
| 26 |
-
def plot_dimensionality_reduction(x, y, label_set, title):
|
| 27 |
plt.title(title)
|
| 28 |
# Color points based on their density
|
| 29 |
if label_set[0].dtype == float:
|
|
|
|
| 2 |
import matplotlib.pyplot as plt
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
+
|
| 6 |
# Latent Feature Cluster for Training Data using T-SNE
|
| 7 |
+
def TSNE_reduction(latent_points: np.ndarray, perplexity=30, learning_rate=20):
|
| 8 |
+
"""
|
| 9 |
+
:param latent_points: [ndarray] - an array of arrays that define the points of an object in the latent space
|
| 10 |
+
:param perplexity: [int] - default perplexity = 30 " Perplexity balances the attention t-SNE gives to local and
|
| 11 |
+
global aspects of the data. It is roughly a guess of the number of close neighbors each point has...
|
| 12 |
+
a denser dataset ... requires higher perplexity value" Recommended: Perplexity(5-50)
|
| 13 |
+
:param learning_rate: [int] - default learning rate = 200 "If the learning rate is too high, the data may look
|
| 14 |
+
like a ‘ball’ with any point approximately equidistant from its nearest neighbours.
|
| 15 |
+
If the learning rate is too low, most points may look compressed in a dense cloud with few outliers."
|
| 16 |
+
Recommended: learning_rate(10-1000)
|
| 17 |
+
:return: [tuple] - the output is the x and y coordinates for the reduced latent space, a title, and an embedding
|
| 18 |
+
"""
|
| 19 |
model = TSNE(n_components=2, random_state=0, perplexity=perplexity,
|
| 20 |
+
learning_rate=learning_rate)
|
|
|
|
|
|
|
| 21 |
# the number of components = dimension of the embedded space
|
| 22 |
+
|
| 23 |
+
embedding = model
|
| 24 |
+
|
| 25 |
+
tsne_data = model.fit_transform(latent_points)
|
| 26 |
+
# When there are more data points, only use a couple of hundred points so TSNE doesn't take too long
|
|
|
|
|
|
|
| 27 |
x = tsne_data[:, 0]
|
| 28 |
y = tsne_data[:, 1]
|
| 29 |
title = ("T-SNE of Data")
|
| 30 |
return x, y, title, embedding
|
| 31 |
|
| 32 |
|
| 33 |
+
def plot_dimensionality_reduction(x: list, y: list, label_set: list, title: str):
|
| 34 |
plt.title(title)
|
| 35 |
# Color points based on their density
|
| 36 |
if label_set[0].dtype == float:
|