jsulz HF staff commited on
Commit
6e5511f
·
1 Parent(s): 728801d

initial update to test

Browse files
Files changed (3) hide show
  1. index.html +92 -7
  2. style.css +14 -14
  3. xorbs.json +0 -0
index.html CHANGED
@@ -3,17 +3,102 @@
3
  <head>
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
  <link rel="stylesheet" href="style.css" />
 
 
 
 
 
 
 
 
 
8
  </head>
9
  <body>
10
  <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
 
 
 
 
 
 
 
17
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  </body>
19
  </html>
 
3
  <head>
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width" />
6
+ <title>xet-repo-dedupe</title>
7
  <link rel="stylesheet" href="style.css" />
8
+ <script src="https://cdn.jsdelivr.net/npm/vega@5"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>
10
+ <script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>
11
+ <style>
12
+ #vis {
13
+ width: 100%;
14
+ text-align: center;
15
+ }
16
+ </style>
17
  </head>
18
  <body>
19
  <div class="card">
20
+ <h1>Visualizing Repo-level Dedupe</h1>
21
+ <p>This visualization demonstrates the amount of <a target="_blank" rel="noopener noreferrer" href="https://huggingface.co/blog/from-files-to-chunks">chunk-level dedupe</a> across all public repos.</p>
22
+ <p>"Dedupe factor" is defined as the number of re-uses of a given "xorb". A "xorb" is a collection of content-defined chunks, typically around 1,000 chunks comprising up to 64 MB of total data.</p>
23
+ <p>Interactions:
24
+ <ul>
25
+ <li>
26
+ Hover to select a xorb, and highlight the same xorb in all other repos in <strong><span style="color: red">red</span></strong>.
27
+ </li>
28
+ <li>
29
+ Click to select a row (repo), and fade out all repos that don't contain any overlapping data. Double-click to clear selection.
30
+ </li>
31
+ </ul>
32
+ </p>
33
  </div>
34
+ <div id="vis"></div>
35
+ <script>
36
+ var vlSpec = {
37
+ "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
38
+ "resolve": {"scale": {"x": "independent"}},
39
+ "width": 600,
40
+ "height": 12,
41
+ "params": [
42
+ {
43
+ "name": "highlight",
44
+ "select": {"type": "point", "fields": ["xorb_id"], "on": "pointerover"}
45
+ },
46
+ {
47
+ "name": "select",
48
+ "select": {"type": "point", "fields": ["repo"], "toggle": "false"}
49
+ },
50
+ {
51
+ "name": "xorbs_selected",
52
+ "expr": "pluck(data('source_0'), 'repo_xorb_selected')"
53
+ },
54
+ {"name": "any_xorbs_selected", "expr": "extent(xorbs_selected)[0] != null"}
55
+ ],
56
+ "transform": [
57
+ {
58
+ "calculate": "(select.repo != null ? indexof(select.repo, datum.repo) : -1) + 1",
59
+ "as": "repo_selected"
60
+ },
61
+ {
62
+ "calculate": "if(datum.repo_selected > 0, datum.xorb_id, null)",
63
+ "as": "repo_xorb_selected"
64
+ }
65
+ ],
66
+ "data": {
67
+ "url": "xorbs.json"
68
+ },
69
+ "mark": "rect",
70
+ "encoding": {
71
+ "x": {
72
+ "field": "xorb_id",
73
+ "axis": null,
74
+ "stack": "normalize"
75
+ },
76
+ "color": {
77
+ "condition": [
78
+ {"test": "datum.xorb_id == highlight.xorb_id", "value": "orange"}
79
+ ],
80
+ "field": "dedupe_factor",
81
+ "type": "quantitative",
82
+ "scale": {"domain": [0, 10]}
83
+ },
84
+ "opacity": {
85
+ "condition": [
86
+ {
87
+ "test": "any_xorbs_selected && indexof(xorbs_selected, datum.xorb_id) == -1",
88
+ "value": 0.2
89
+ }
90
+ ]
91
+ },
92
+ "tooltip": {"field": "dedupe_factor"},
93
+ "row": {
94
+ "field": "repo",
95
+ "spacing": 1,
96
+ "header": {"labelAngle": 0, "labelAlign": "left"},
97
+ "sort": {"field": "dedupe_factor", "order": "descending"}
98
+ }
99
+ }
100
+ };
101
+ vegaEmbed('#vis', vlSpec);
102
+ </script>
103
  </body>
104
  </html>
style.css CHANGED
@@ -1,28 +1,28 @@
1
  body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
  }
5
 
6
  h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
  }
10
 
11
  p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
  }
17
 
18
  .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
  }
25
 
26
  .card p:last-child {
27
- margin-bottom: 0;
28
  }
 
1
  body {
2
+ padding: 2rem;
3
+ font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
  }
5
 
6
  h1 {
7
+ font-size: 16px;
8
+ margin-top: 0;
9
  }
10
 
11
  p {
12
+ color: rgb(107, 114, 128);
13
+ font-size: 15px;
14
+ margin-bottom: 10px;
15
+ margin-top: 5px;
16
  }
17
 
18
  .card {
19
+ max-width: 620px;
20
+ margin: 0 auto;
21
+ padding: 16px;
22
+ border: 1px solid lightgray;
23
+ border-radius: 16px;
24
  }
25
 
26
  .card p:last-child {
27
+ margin-bottom: 0;
28
  }
xorbs.json ADDED
The diff for this file is too large to render. See raw diff