File size: 4,101 Bytes
10d6c31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f99922
 
 
10d6c31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b82972a
 
 
 
 
2f99922
b82972a
 
 
 
 
 
10d6c31
 
 
 
 
 
 
 
2cecf9a
10d6c31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386bc71
10d6c31
 
 
 
 
 
386bc71
10d6c31
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
from functools import partial

import gradio as gr
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
import numpy as np
from sklearn import datasets, manifold


SEED = 0
N_COMPONENTS = 2
np.random.seed(SEED)


def get_circles(n_samples):
    X, color = datasets.make_circles(
        n_samples=n_samples,
        factor=0.5,
        noise=0.05,
        random_state=SEED
    )
    return X, color


def get_s_curve(n_samples):
    X, color = datasets.make_s_curve(n_samples=n_samples, random_state=SEED)
    X[:, 1], X[:, 2] = X[:, 2], X[:, 1].copy()
    return X, color


def get_uniform_grid(n_samples):
    x = np.linspace(0, 1, int(np.sqrt(n_samples)))
    xx, yy = np.meshgrid(x, x)
    X = np.hstack(
        [
            xx.ravel().reshape(-1, 1),
            yy.ravel().reshape(-1, 1),
        ]
    )
    color = xx.ravel()
    return X, color


DATA_MAPPING = {
    'Circles': get_circles,
    'S-curve': get_s_curve,
    'Uniform Grid': get_uniform_grid,
}



def plot_data(dataset: str, perplexity: int, n_samples: int, tsne: bool):
    if isinstance(perplexity, dict):
        perplexity = perplexity['value']
    else:
        perplexity = int(perplexity)

    X, color = DATA_MAPPING[dataset](n_samples)
    if tsne:
        tsne = manifold.TSNE(
            n_components=N_COMPONENTS,
            init="random",
            random_state=0,
            perplexity=perplexity,
            n_iter=400,
        )
        Y = tsne.fit_transform(X)
    else:
        Y = X

    fig, ax = plt.subplots(figsize=(7, 7))

    ax.scatter(Y[:, 0], Y[:, 1], c=color)
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    ax.axis("tight")

    return fig


title = "t-SNE: The effect of various perplexity values on the shape"
description = """
t-Stochastic Neighborhood Embedding ([t-SNE](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html)) is a powerful technique dimensionality reduction and visualization of high dimensional datasets.

One of the key parameters in t-SNE is perplexity, which controls the number of nearest neighbors used to represent each data point in the low-dimensional space.

In this illustration, we explore the impact of various perplexity values on t-SNE visualizations using three commonly used datasets: Concentric Circles, S-curve and Uniform Grid.

By comparing the resulting visualizations, we demonstrate how changing the perplexity value affects the shape of the visualization.

Created by [@Hnabil](https://huggingface.co/Hnabil) based on [scikit-learn docs](https://scikit-learn.org/stable/auto_examples/manifold/plot_t_sne_perplexity.html)
"""



with gr.Blocks(title=title) as demo:
    gr.HTML(f"<b>{title}</b>")
    gr.Markdown(description)

    input_data = gr.Radio(
        list(DATA_MAPPING),
        value="Circles",
        label="dataset"
    )
    n_samples = gr.Slider(
        minimum=100,
        maximum=1000,
        value=150,
        step=25,
        label='Number of Samples'
    )
    perplexity = gr.Slider(
        minimum=2,
        maximum=100,
        value=5,
        step=1,
        label='Perplexity'
    )
    with gr.Row():
        with gr.Column():
            plot = gr.Plot(label="Original data")
            fn = partial(plot_data, tsne=False)
            input_data.change(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot)
            n_samples.change(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot)
            demo.load(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot)
        with gr.Column():
            plot = gr.Plot(label="t-SNE")
            fn = partial(plot_data, tsne=True)
            input_data.change(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot)
            perplexity.change(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot)
            n_samples.change(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot)
            demo.load(fn=fn, inputs=[input_data, perplexity, n_samples], outputs=plot)


demo.launch()