davanstrien HF staff commited on
Commit
4edfecf
Β·
1 Parent(s): 1c9d91a

static version

Browse files
Files changed (2) hide show
  1. README.md +2 -4
  2. index.html +463 -0
README.md CHANGED
@@ -3,11 +3,9 @@ title: Semantic Dataset Search
3
  emoji: πŸ”Ž
4
  colorFrom: red
5
  colorTo: pink
6
- sdk: gradio
7
- sdk_version: 4.43.0
8
- app_file: app.py
9
  pinned: false
10
- python_version: 3.11
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
3
  emoji: πŸ”Ž
4
  colorFrom: red
5
  colorTo: pink
6
+ sdk: static
7
+ app_file: index.html
 
8
  pinned: false
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
index.html ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Hub Semantic Search</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <script src="https://unpkg.com/lucide@latest"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/lodash.min.js"></script>
10
+ </head>
11
+ <body>
12
+ <div class="w-full max-w-4xl mx-auto p-4 space-y-8">
13
+ <h1 class="text-3xl font-bold text-gray-800">Hub Semantic Search</h1>
14
+
15
+ <div
16
+ class="bg-gradient-to-br from-blue-50 to-indigo-50 p-8 rounded-xl shadow-sm border border-blue-100 mb-8"
17
+ >
18
+ <h2
19
+ class="text-xl font-semibold mb-4 text-gray-800 flex items-center gap-2"
20
+ >
21
+ <i data-lucide="search" class="text-blue-500"></i>
22
+ Welcome to Hub Semantic Search
23
+ </h2>
24
+ <p class="text-gray-700 mb-4">
25
+ Find and explore the πŸ€— Hub using via semantic search on LLM generated
26
+ summaries!
27
+ </p>
28
+
29
+ <div class="bg-blue-100 text-blue-800 px-4 py-2 rounded-md mb-4">
30
+ <p class="flex items-center gap-2">
31
+ <i data-lucide="info"></i> Currently supporting dataset search only.
32
+ Model search coming soon!
33
+ </p>
34
+ </div>
35
+
36
+ <button
37
+ onclick="toggleAccordion()"
38
+ id="accordionButton"
39
+ class="text-blue-500 hover:text-blue-700 flex items-center gap-2 mb-4"
40
+ >
41
+ <i
42
+ data-lucide="chevron-right"
43
+ id="accordionIcon"
44
+ class="transition-transform"
45
+ ></i>
46
+ <span>How it works</span>
47
+ </button>
48
+
49
+ <div id="accordionContent" class="hidden">
50
+ <ul class="list-disc list-inside space-y-2 text-gray-600 ml-4">
51
+ <li>
52
+ <strong>AI-Generated Summaries:</strong> Each dataset is indexed
53
+ using a concise, one-sentence summary generated by a large
54
+ language model trained on thousands of Hugging Face dataset cards
55
+ </li>
56
+ <li>
57
+ <strong>Semantic Search:</strong> Enter keywords or descriptions
58
+ to find semantically similar resources based on these AI-generated
59
+ summaries
60
+ </li>
61
+ <li>
62
+ <strong>Find Similar:</strong> Enter a dataset ID (e.g.,
63
+ "airtrain-ai/fineweb-edu-fortified") to discover related resources
64
+ using semantic matching
65
+ </li>
66
+ </ul>
67
+ </div>
68
+ </div>
69
+
70
+ <div class="tabs w-full">
71
+ <div class="tab-list flex gap-2 border-b mb-6">
72
+ <button
73
+ onclick="switchTab('search')"
74
+ id="searchTab"
75
+ class="tab-trigger active px-6 py-3 flex items-center gap-2 border-b-2 border-transparent hover:bg-gray-50 transition-colors"
76
+ >
77
+ <i data-lucide="search"></i> Search
78
+ </button>
79
+ <button
80
+ onclick="switchTab('similar')"
81
+ id="similarTab"
82
+ class="tab-trigger px-6 py-3 flex items-center gap-2 border-b-2 border-transparent hover:bg-gray-50 transition-colors"
83
+ >
84
+ <i data-lucide="arrow-right"></i> Find Similar
85
+ </button>
86
+ </div>
87
+
88
+ <div id="searchContent" class="tab-content space-y-4">
89
+ <div
90
+ class="card bg-white p-8 rounded-xl shadow-sm border border-gray-100"
91
+ >
92
+ <p class="text-gray-600 mb-4">
93
+ Enter keywords to search through dataset descriptions. The search
94
+ will automatically update as you type.
95
+ </p>
96
+ <div class="relative">
97
+ <input
98
+ type="text"
99
+ id="searchInput"
100
+ placeholder="Type to search (minimum 3 characters)..."
101
+ class="w-full p-3 border rounded-lg pr-10 focus:ring-2 focus:ring-blue-100 focus:border-blue-300 transition-all outline-none"
102
+ />
103
+ <div id="searchLoader" class="hidden absolute right-3 top-2">
104
+ <i data-lucide="loader-2" class="animate-spin"></i>
105
+ </div>
106
+ </div>
107
+ </div>
108
+ </div>
109
+
110
+ <div id="similarContent" class="hidden tab-content space-y-4">
111
+ <div
112
+ class="card bg-white p-8 rounded-xl shadow-sm border border-gray-100"
113
+ >
114
+ <p class="text-gray-600 mb-4">
115
+ Enter a dataset ID to find similar datasets. You can also click
116
+ "Find Similar" on any search result to quickly find related
117
+ datasets.
118
+ </p>
119
+ <div class="flex gap-3">
120
+ <input
121
+ type="text"
122
+ id="datasetInput"
123
+ placeholder="Enter dataset ID..."
124
+ class="w-full p-3 border rounded-lg focus:ring-2 focus:ring-blue-100 focus:border-blue-300 transition-all outline-none"
125
+ />
126
+ <button
127
+ onclick="findSimilarDatasets()"
128
+ class="px-6 py-3 bg-blue-500 hover:bg-blue-600 text-white rounded-lg transition-colors flex items-center gap-2"
129
+ >
130
+ <i data-lucide="search"></i>
131
+ Search
132
+ </button>
133
+ <div id="similarLoader" class="hidden">
134
+ <i data-lucide="loader-2" class="animate-spin"></i>
135
+ </div>
136
+ </div>
137
+ </div>
138
+ </div>
139
+
140
+ <div
141
+ id="errorMessage"
142
+ class="hidden mt-4 p-4 text-red-600 bg-red-50 rounded-md"
143
+ ></div>
144
+
145
+ <div id="resultsContainer" class="mt-6 space-y-4"></div>
146
+ </div>
147
+ </div>
148
+
149
+ <style>
150
+ .tab-trigger.active {
151
+ border-bottom-color: #3b82f6;
152
+ color: #3b82f6;
153
+ }
154
+ </style>
155
+
156
+ <script>
157
+ // Configuration
158
+ const API_URL =
159
+ "https://davanstrien-huggingface-datasets-search-v2.hf.space";
160
+ const MIN_SEARCH_LENGTH = 3;
161
+ const DEBOUNCE_MS = 300;
162
+ const RESULTS_PER_PAGE = 5;
163
+ const MAX_RESULTS = 100;
164
+ let currentPage = 1;
165
+
166
+ // Initialize Lucide icons
167
+ lucide.createIcons();
168
+
169
+ // Tab switching
170
+ function switchTab(tabId) {
171
+ currentPage = 1;
172
+ document
173
+ .querySelectorAll(".tab-content")
174
+ .forEach((content) => content.classList.add("hidden"));
175
+ document
176
+ .querySelectorAll(".tab-trigger")
177
+ .forEach((trigger) => trigger.classList.remove("active"));
178
+
179
+ document.getElementById(`${tabId}Content`).classList.remove("hidden");
180
+ document.getElementById(`${tabId}Tab`).classList.add("active");
181
+ }
182
+
183
+ // Create result card
184
+ function createResultCard(result) {
185
+ const cardHtml = `
186
+ <div class="card bg-white p-6 rounded-lg shadow hover:shadow-md transition-shadow">
187
+ <div class="flex items-start justify-between">
188
+ <div class="space-y-2 w-full">
189
+ <div class="flex items-center justify-between">
190
+ <div class="flex items-center gap-2">
191
+ <i data-lucide="database" class="text-blue-500"></i>
192
+ <h3 class="text-lg font-semibold">${
193
+ result.dataset_id
194
+ }</h3>
195
+ </div>
196
+ <div class="flex items-center gap-2">
197
+ <div class="flex items-center gap-4 text-sm text-gray-500 mr-4">
198
+ <span class="flex items-center gap-1">
199
+ <i data-lucide="heart" class="w-4 h-4"></i>
200
+ ${result.likes}
201
+ </span>
202
+ <span class="flex items-center gap-1">
203
+ <i data-lucide="download" class="w-4 h-4"></i>
204
+ ${result.downloads}
205
+ </span>
206
+ </div>
207
+ <span class="bg-blue-50 px-2 py-1 rounded text-sm">
208
+ ${(result.similarity * 100).toFixed(
209
+ 1
210
+ )}% match
211
+ </span>
212
+ <button
213
+ onclick="findSimilarFromResult('${
214
+ result.dataset_id
215
+ }')"
216
+ class="flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700"
217
+ >
218
+ <i data-lucide="arrow-right"></i>
219
+ Find Similar
220
+ </button>
221
+ </div>
222
+ </div>
223
+ <p class="text-sm text-gray-600">${result.summary}</p>
224
+
225
+ <!-- Add preview section that starts hidden -->
226
+ <div id="preview-section-${
227
+ result.dataset_id
228
+ }" class="mt-4 border-t pt-4 hidden">
229
+ <button
230
+ onclick="togglePreview('${result.dataset_id}')"
231
+ class="flex items-center gap-2 text-sm text-gray-600 hover:text-gray-800"
232
+ >
233
+ <i data-lucide="chevron-right" id="preview-icon-${
234
+ result.dataset_id
235
+ }" class="transition-transform"></i>
236
+ Preview Dataset
237
+ </button>
238
+ <div id="preview-content-${
239
+ result.dataset_id
240
+ }" class="hidden mt-4">
241
+ <iframe
242
+ src="https://huggingface.co/datasets/${
243
+ result.dataset_id
244
+ }/embed/viewer/default/train"
245
+ frameborder="0"
246
+ width="100%"
247
+ height="560px"
248
+ ></iframe>
249
+ </div>
250
+ </div>
251
+
252
+ <a href="https://huggingface.co/datasets/${
253
+ result.dataset_id
254
+ }"
255
+ target="_blank"
256
+ class="inline-flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700 mt-2">
257
+ <i data-lucide="external-link" class="w-4 h-4"></i>
258
+ View on Hugging Face Hub
259
+ </a>
260
+ </div>
261
+ </div>
262
+ </div>
263
+ `;
264
+
265
+ // After rendering the card, check if preview is available
266
+ checkDatasetValidity(result.dataset_id);
267
+
268
+ return cardHtml;
269
+ }
270
+
271
+ // Add function to check dataset validity
272
+ async function checkDatasetValidity(datasetId) {
273
+ try {
274
+ const response = await fetch(
275
+ `https://datasets-server.huggingface.co/is-valid?dataset=${datasetId}`
276
+ );
277
+ const data = await response.json();
278
+
279
+ // Show preview section only if viewer is available
280
+ if (data.viewer) {
281
+ const previewSection = document.getElementById(
282
+ `preview-section-${datasetId}`
283
+ );
284
+ if (previewSection) {
285
+ previewSection.classList.remove("hidden");
286
+ }
287
+ }
288
+ } catch (error) {
289
+ console.error(
290
+ `Failed to check validity for dataset ${datasetId}:`,
291
+ error
292
+ );
293
+ }
294
+ }
295
+
296
+ // Search datasets
297
+ const searchDatasets = _.debounce(async (query, page = 1) => {
298
+ if (query.length < MIN_SEARCH_LENGTH) {
299
+ document.getElementById("resultsContainer").innerHTML = "";
300
+ return;
301
+ }
302
+
303
+ document.getElementById("searchLoader").classList.remove("hidden");
304
+ document.getElementById("errorMessage").classList.add("hidden");
305
+
306
+ try {
307
+ const response = await fetch(
308
+ `${API_URL}/search/datasets?query=${encodeURIComponent(query)}&k=${
309
+ RESULTS_PER_PAGE * page
310
+ }`
311
+ );
312
+ if (!response.ok) throw new Error("Search failed");
313
+
314
+ const data = await response.json();
315
+ console.log("Search results:", data);
316
+ displayResults(data.results, page);
317
+ } catch (error) {
318
+ console.error("Search error:", error);
319
+ showError("Failed to perform search. Please try again.");
320
+ } finally {
321
+ document.getElementById("searchLoader").classList.add("hidden");
322
+ }
323
+ }, DEBOUNCE_MS);
324
+
325
+ // Find similar datasets
326
+ async function findSimilarDatasets(page = 1) {
327
+ const datasetId = document.getElementById("datasetInput").value;
328
+ if (!datasetId) return;
329
+
330
+ document.getElementById("similarLoader").classList.remove("hidden");
331
+ document.getElementById("errorMessage").classList.add("hidden");
332
+
333
+ try {
334
+ const response = await fetch(
335
+ `${API_URL}/similarity/datasets?dataset_id=${encodeURIComponent(
336
+ datasetId
337
+ )}&k=${RESULTS_PER_PAGE * page}`
338
+ );
339
+ if (!response.ok) throw new Error("Similarity search failed");
340
+
341
+ const data = await response.json();
342
+ displayResults(data.results, page);
343
+ } catch (error) {
344
+ showError("Failed to find similar datasets. Please try again.");
345
+ } finally {
346
+ document.getElementById("similarLoader").classList.add("hidden");
347
+ }
348
+ }
349
+
350
+ // Display results
351
+ function displayResults(results, page = 1) {
352
+ const container = document.getElementById("resultsContainer");
353
+ console.log("Displaying results:", results);
354
+ if (results && results.length > 0) {
355
+ container.innerHTML = `
356
+ <div class="flex justify-between items-center">
357
+ <h2 class="text-lg font-semibold">Results</h2>
358
+ <span class="text-sm text-gray-500">Found ${
359
+ results.length
360
+ } results</span>
361
+ </div>
362
+ ${results.map((result) => createResultCard(result)).join("")}
363
+ ${
364
+ results.length >= RESULTS_PER_PAGE * page &&
365
+ RESULTS_PER_PAGE * (page + 1) <= MAX_RESULTS
366
+ ? `<button
367
+ onclick="loadMore()"
368
+ class="w-full mt-4 px-6 py-3 bg-gray-100 hover:bg-gray-200 text-gray-700 rounded-lg transition-colors flex items-center gap-2 justify-center"
369
+ >
370
+ <i data-lucide="more-horizontal"></i>
371
+ Load More Results
372
+ </button>`
373
+ : results.length >= MAX_RESULTS
374
+ ? `<div class="text-center mt-4 p-6 bg-blue-50 rounded-lg">
375
+ <p class="text-gray-700 mb-3">πŸŽ‰ You've reached the end of our dataset journey! (${MAX_RESULTS} results)</p>
376
+ <p class="text-gray-600 mb-4">Can't find what you're looking for? Why not create and share your own dataset?</p>
377
+ <a href="https://huggingface.co/docs/datasets/upload_dataset"
378
+ target="_blank"
379
+ class="inline-flex items-center gap-2 text-blue-500 hover:text-blue-700">
380
+ <i data-lucide="external-link"></i>
381
+ Learn how to share your dataset on Hugging Face
382
+ </a>
383
+ </div>`
384
+ : ""
385
+ }
386
+ `;
387
+ lucide.createIcons();
388
+ } else {
389
+ container.innerHTML = `
390
+ <div class="text-center text-gray-500">
391
+ No results found
392
+ </div>
393
+ `;
394
+ }
395
+ }
396
+
397
+ // Show error message
398
+ function showError(message) {
399
+ const errorElement = document.getElementById("errorMessage");
400
+ errorElement.textContent = message;
401
+ errorElement.classList.remove("hidden");
402
+ }
403
+
404
+ // Event listeners
405
+ document
406
+ .getElementById("searchInput")
407
+ .addEventListener("input", (e) => searchDatasets(e.target.value));
408
+ document
409
+ .getElementById("datasetInput")
410
+ .addEventListener("keydown", (e) => {
411
+ if (e.key === "Enter") findSimilarDatasets();
412
+ });
413
+
414
+ // Add new function to handle finding similar datasets from results
415
+ function findSimilarFromResult(datasetId) {
416
+ // Switch to the similar tab
417
+ switchTab("similar");
418
+
419
+ // Set the dataset ID in the input
420
+ const datasetInput = document.getElementById("datasetInput");
421
+ datasetInput.value = datasetId;
422
+
423
+ // Trigger the search
424
+ findSimilarDatasets();
425
+ }
426
+
427
+ // Add accordion functionality
428
+ function toggleAccordion() {
429
+ const content = document.getElementById("accordionContent");
430
+ const icon = document.getElementById("accordionIcon");
431
+
432
+ content.classList.toggle("hidden");
433
+ icon.style.transform = content.classList.contains("hidden")
434
+ ? "rotate(0deg)"
435
+ : "rotate(90deg)";
436
+ }
437
+
438
+ // Add the loadMore function
439
+ function loadMore() {
440
+ currentPage += 1;
441
+ const activeTab = document.querySelector(".tab-trigger.active").id;
442
+
443
+ if (activeTab === "searchTab") {
444
+ const searchQuery = document.getElementById("searchInput").value;
445
+ searchDatasets(searchQuery, currentPage);
446
+ } else {
447
+ findSimilarDatasets(currentPage);
448
+ }
449
+ }
450
+
451
+ // Add this new function for toggling the preview
452
+ function togglePreview(datasetId) {
453
+ const content = document.getElementById(`preview-content-${datasetId}`);
454
+ const icon = document.getElementById(`preview-icon-${datasetId}`);
455
+
456
+ content.classList.toggle("hidden");
457
+ icon.style.transform = content.classList.contains("hidden")
458
+ ? "rotate(0deg)"
459
+ : "rotate(90deg)";
460
+ }
461
+ </script>
462
+ </body>
463
+ </html>