davanstrien's picture
davanstrien HF Staff
add share query button
b6c1ca0
raw
history blame
26.6 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Hub Semantic Search</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://unpkg.com/lucide@latest"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lodash.min.js"></script>
</head>
<body>
<div class="w-full max-w-4xl mx-auto p-4 space-y-8">
<h1 class="text-3xl font-bold text-gray-800">Hub Semantic Search</h1>
<div
class="bg-gradient-to-br from-blue-50 to-indigo-50 p-6 rounded-xl shadow-sm border border-blue-100 mb-6"
>
<h2
class="text-lg font-semibold mb-2 text-gray-800 flex items-center gap-2"
>
<i data-lucide="search" class="text-blue-500"></i>
Welcome to Hub Semantic Search
</h2>
<p class="text-gray-700 mb-2 text-sm">
Find and explore the 🤗 Hub using via semantic search on LLM generated
summaries!
</p>
<div
class="bg-blue-100 text-blue-800 px-3 py-1.5 rounded-md mb-2 text-sm"
>
<p class="flex items-center gap-2">
<i data-lucide="info"></i> Currently supporting dataset search only.
Model search coming soon!
</p>
</div>
<button
onclick="toggleAccordion()"
id="accordionButton"
class="text-blue-500 hover:text-blue-700 flex items-center gap-2 text-sm"
>
<i
data-lucide="chevron-right"
id="accordionIcon"
class="transition-transform"
></i>
<span>How it works</span>
</button>
<div id="accordionContent" class="hidden">
<ul
class="list-disc list-inside space-y-1 text-gray-600 ml-4 mt-2 text-sm"
>
<li>
<strong>AI-Generated Summaries:</strong> Each dataset is indexed
using a concise summary generated by an LLM
</li>
<li>
<strong>Semantic Search:</strong> Find semantically similar
resources based on these summaries
</li>
<li>
<strong>Find Similar:</strong> Discover related resources using
semantic matching
</li>
</ul>
</div>
</div>
<div class="tabs w-full">
<div class="tab-list flex gap-2 border-b mb-6">
<button
onclick="switchTab('search')"
id="searchTab"
class="tab-trigger active px-4 sm:px-6 py-3 flex items-center gap-2 border-b-2 border-transparent hover:bg-gray-50 transition-colors flex-1 justify-center"
>
<i data-lucide="search"></i> Search
</button>
<button
onclick="switchTab('similar')"
id="similarTab"
class="tab-trigger px-4 sm:px-6 py-3 flex items-center gap-2 border-b-2 border-transparent hover:bg-gray-50 transition-colors flex-1 justify-center"
>
<i data-lucide="arrow-right"></i> Find Similar
</button>
</div>
<div id="searchContent" class="tab-content space-y-4">
<div
class="card bg-white p-8 rounded-xl shadow-sm border border-gray-100"
>
<p class="text-gray-600 mb-4">
Enter keywords to search through dataset descriptions. The search
will automatically update as you type.
</p>
<div class="relative">
<input
type="text"
id="searchInput"
placeholder="Type to search (minimum 3 characters)..."
class="w-full p-3 border rounded-lg pr-10 focus:ring-2 focus:ring-blue-100 focus:border-blue-300 transition-all outline-none"
/>
<div id="searchLoader" class="hidden absolute right-3 top-2">
<i data-lucide="loader-2" class="animate-spin"></i>
</div>
</div>
</div>
</div>
<div id="similarContent" class="hidden tab-content space-y-4">
<div
class="card bg-white p-8 rounded-xl shadow-sm border border-gray-100"
>
<p class="text-gray-600 mb-4">
Enter a dataset ID to find similar datasets. Popular datasets will
appear as you type.
</p>
<div class="flex gap-3">
<div class="relative w-full">
<input
type="text"
id="datasetInput"
class="w-full p-3 border border-gray-200 rounded-lg"
placeholder="e.g. openai/gsm8k"
/>
<div
id="suggestionsBox"
class="hidden absolute w-full mt-1 bg-white border border-gray-200 rounded-lg shadow-lg z-10 max-h-60 overflow-y-auto"
></div>
</div>
<button onclick="findSimilarDatasets()" class="btn-primary">
Find Similar
</button>
</div>
</div>
</div>
<div
id="errorMessage"
class="hidden mt-4 p-4 text-red-600 bg-red-50 rounded-md"
></div>
<div id="resultsContainer" class="mt-6 space-y-4"></div>
</div>
</div>
<style>
.tab-trigger.active {
border-bottom-color: #3b82f6;
color: #3b82f6;
}
</style>
<script>
// Configuration
const API_URL =
"https://davanstrien-huggingface-datasets-search-v2.hf.space";
const MIN_SEARCH_LENGTH = 3;
const DEBOUNCE_MS = 300;
const RESULTS_PER_PAGE = 5;
const MAX_RESULTS = 100;
let currentPage = 1;
// Add these constants near the top with other configurations
const URL_PARAMS = new URLSearchParams(window.location.search);
const INITIAL_SEARCH = URL_PARAMS.get("q");
const INITIAL_SIMILAR = URL_PARAMS.get("similar");
// Initialize Lucide icons
lucide.createIcons();
// Tab switching
function switchTab(tabId) {
currentPage = 1;
document
.querySelectorAll(".tab-content")
.forEach((content) => content.classList.add("hidden"));
document
.querySelectorAll(".tab-trigger")
.forEach((trigger) => trigger.classList.remove("active"));
document.getElementById(`${tabId}Content`).classList.remove("hidden");
document.getElementById(`${tabId}Tab`).classList.add("active");
// Clear URL parameters when switching tabs
if (tabId === "search") {
updateURL({ similar: null });
} else if (tabId === "similar") {
updateURL({ q: null });
}
}
// Create result card
function createResultCard(result) {
const cardHtml = `
<div class="card bg-white p-4 sm:p-6 rounded-lg shadow hover:shadow-md transition-shadow">
<div class="space-y-2 w-full">
<div class="flex flex-col sm:flex-row sm:items-center justify-between gap-2">
<div class="flex items-center gap-2">
<i data-lucide="database" class="text-blue-500"></i>
<h3 class="text-lg font-semibold">${
result.dataset_id
}</h3>
</div>
<div class="flex flex-wrap items-center gap-2">
<div class="flex items-center gap-4 text-sm text-gray-500">
<span class="flex items-center gap-1">
<i data-lucide="heart" class="w-4 h-4"></i>
${result.likes}
</span>
<span class="flex items-center gap-1">
<i data-lucide="download" class="w-4 h-4"></i>
${result.downloads}
</span>
</div>
<span class="bg-blue-50 px-2 py-1 rounded text-sm">
${(result.similarity * 100).toFixed(1)}% match
</span>
<button
onclick="shareResult('${result.dataset_id}')"
class="flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700"
title="Share link to these search results"
>
<i data-lucide="share-2"></i>
Share Results
</button>
<button
onclick="findSimilarFromResult('${
result.dataset_id
}')"
class="flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700"
>
<i data-lucide="arrow-right"></i>
Find Similar
</button>
</div>
</div>
<p class="text-sm text-gray-600">${result.summary}</p>
<!-- Add preview section that starts hidden -->
<div id="preview-section-${
result.dataset_id
}" class="mt-4 border-t pt-4 hidden">
<button
onclick="togglePreview('${result.dataset_id}')"
class="flex items-center gap-2 text-sm text-gray-600 hover:text-gray-800"
>
<i data-lucide="chevron-right" id="preview-icon-${
result.dataset_id
}" class="transition-transform"></i>
Preview Dataset
</button>
<div id="preview-content-${
result.dataset_id
}" class="hidden mt-4">
<iframe
src="https://huggingface.co/datasets/${
result.dataset_id
}/embed/viewer/default/train"
frameborder="0"
width="100%"
height="560px"
></iframe>
</div>
</div>
<a href="https://huggingface.co/datasets/${
result.dataset_id
}"
target="_blank"
class="inline-flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700 mt-2">
<i data-lucide="external-link" class="w-4 h-4"></i>
View on Hugging Face Hub
</a>
</div>
</div>
`;
// After rendering the card, check if preview is available
checkDatasetValidity(result.dataset_id);
return cardHtml;
}
// Add function to check dataset validity
async function checkDatasetValidity(datasetId) {
try {
const response = await fetch(
`https://datasets-server.huggingface.co/is-valid?dataset=${datasetId}`
);
const data = await response.json();
// Show preview section only if viewer is available
if (data.viewer) {
const previewSection = document.getElementById(
`preview-section-${datasetId}`
);
if (previewSection) {
previewSection.classList.remove("hidden");
}
}
} catch (error) {
console.error(
`Failed to check validity for dataset ${datasetId}:`,
error
);
}
}
// Add this function to update the URL
function updateURL(params) {
const newURL = new URL(window.location);
Object.entries(params).forEach(([key, value]) => {
if (value) {
newURL.searchParams.set(key, value);
} else {
newURL.searchParams.delete(key);
}
});
window.history.pushState({}, "", newURL);
}
// Modify the searchDatasets function
const searchDatasets = _.debounce(async (query, page = 1) => {
if (query.length < MIN_SEARCH_LENGTH) {
document.getElementById("resultsContainer").innerHTML = "";
updateURL({ q: null, similar: null }); // Clear URL params
return;
}
document.getElementById("searchLoader").classList.remove("hidden");
document.getElementById("errorMessage").classList.add("hidden");
// Update URL with search query
updateURL({ q: query, similar: null });
try {
const response = await fetch(
`${API_URL}/search/datasets?query=${encodeURIComponent(query)}&k=${
RESULTS_PER_PAGE * page
}`
);
if (!response.ok) throw new Error("Search failed");
const data = await response.json();
console.log("Search results:", data);
displayResults(data.results, page);
} catch (error) {
console.error("Search error:", error);
showError("Failed to perform search. Please try again.");
} finally {
document.getElementById("searchLoader").classList.add("hidden");
}
}, DEBOUNCE_MS);
// Cache for trending datasets
let trendingDatasetsCache = null;
let cacheTimestamp = null;
const CACHE_DURATION = 1000 * 60 * 15; // 15 minutes
async function fetchTrendingDatasets() {
if (
trendingDatasetsCache &&
cacheTimestamp &&
Date.now() - cacheTimestamp < CACHE_DURATION
) {
return trendingDatasetsCache;
}
try {
const response = await fetch("https://huggingface.co/api/datasets");
const data = await response.json();
// Just take the first 20 dataset IDs since they're already sorted
const trendingDatasets = data
.slice(0, 20)
.map((dataset) => dataset.id);
trendingDatasetsCache = trendingDatasets;
cacheTimestamp = Date.now();
return trendingDatasets;
} catch (error) {
console.error("Error fetching trending datasets:", error);
return [];
}
}
function displaySuggestions(datasets, suggestionsBox) {
if (datasets.length > 0) {
suggestionsBox.innerHTML = datasets
.map(
(datasetId) => `
<div
class="p-3 hover:bg-gray-50 cursor-pointer border-b last:border-b-0"
onclick="selectSuggestion('${datasetId}')"
>
<div class="flex items-center gap-2">
<i data-lucide="database" class="w-4 h-4 text-blue-500"></i>
<span>${datasetId}</span>
</div>
</div>
`
)
.join("");
suggestionsBox.classList.remove("hidden");
lucide.createIcons();
} else {
suggestionsBox.classList.add("hidden");
}
}
function selectSuggestion(dataset) {
const datasetInput = document.getElementById("datasetInput");
const suggestionsBox = document.getElementById("suggestionsBox");
datasetInput.value = dataset;
suggestionsBox.classList.add("hidden");
findSimilarDatasets();
}
// Modify the findSimilarDatasets function
async function findSimilarDatasets(page = 1) {
const datasetId = document.getElementById("datasetInput").value;
if (!datasetId) return;
// Update URL with similar dataset ID
updateURL({ similar: datasetId, q: null });
const similarLoader = document.getElementById("similarLoader");
if (similarLoader) {
similarLoader.classList.remove("hidden");
}
document.getElementById("errorMessage").classList.add("hidden");
try {
const response = await fetch(
`${API_URL}/similarity/datasets?dataset_id=${encodeURIComponent(
datasetId
)}&k=${RESULTS_PER_PAGE * page}`
);
if (!response.ok) throw new Error("Similarity search failed");
const data = await response.json();
displayResults(data.results, page);
} catch (error) {
showError("Failed to find similar datasets. Please try again.");
} finally {
if (similarLoader) {
similarLoader.classList.add("hidden");
}
}
}
// Display results
function displayResults(results, page = 1) {
const container = document.getElementById("resultsContainer");
console.log("Displaying results:", results);
if (results && results.length > 0) {
container.innerHTML = `
<div class="flex justify-between items-center mb-4">
<h2 class="text-lg font-semibold">Results</h2>
<div class="flex items-center gap-4">
<span class="text-sm text-gray-500">Found ${
results.length
} results</span>
<button
onclick="shareResults()"
class="flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700"
title="Share link to these search results"
>
<i data-lucide="share-2"></i>
Share Results
</button>
</div>
</div>
${results.map((result) => createResultCard(result)).join("")}
${
results.length >= RESULTS_PER_PAGE * page &&
RESULTS_PER_PAGE * (page + 1) <= MAX_RESULTS
? `<div class="mt-4 flex items-center justify-between">
<button
onclick="loadMore()"
class="px-6 py-3 bg-gray-100 hover:bg-gray-200 text-gray-700 rounded-lg transition-colors flex items-center gap-2"
>
<i data-lucide="more-horizontal"></i>
Load More Results
</button>
<button
onclick="shareResults()"
class="flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700"
title="Share link to these search results"
>
<i data-lucide="share-2"></i>
Share Results
</button>
</div>`
: results.length >= MAX_RESULTS
? `<div class="text-center mt-4 p-6 bg-blue-50 rounded-lg">
<p class="text-gray-700 mb-3">You've reached the end of our dataset journey! (${MAX_RESULTS} results)</p>
<p class="text-gray-600 mb-4">Can't find what you're looking for? Why not create and share your own dataset?</p>
<div class="flex items-center justify-center gap-4">
<a href="https://huggingface.co/docs/datasets/upload_dataset"
target="_blank"
class="inline-flex items-center gap-2 text-blue-500 hover:text-blue-700">
<i data-lucide="external-link"></i>
Learn how to share your dataset on Hugging Face
</a>
<button
onclick="shareResults()"
class="flex items-center gap-1 text-blue-500 hover:text-blue-700"
title="Share link to these search results"
>
<i data-lucide="share-2"></i>
Share Results
</button>
</div>
</div>`
: ""
}
`;
lucide.createIcons();
} else {
container.innerHTML = `
<div class="text-center text-gray-500">
No results found
</div>
`;
}
}
// Show error message
function showError(message) {
const errorElement = document.getElementById("errorMessage");
errorElement.textContent = message;
errorElement.classList.remove("hidden");
}
// Event listeners
document
.getElementById("searchInput")
.addEventListener("input", (e) => searchDatasets(e.target.value));
document
.getElementById("datasetInput")
.addEventListener("keydown", (e) => {
if (e.key === "Enter") findSimilarDatasets();
});
// Update the findSimilarFromResult function
function findSimilarFromResult(datasetId) {
// Switch to the similar tab
switchTab("similar");
// Set the dataset ID in the input without triggering the focus event
const datasetInput = document.getElementById("datasetInput");
datasetInput.value = datasetId;
// Hide suggestions box explicitly
const suggestionsBox = document.getElementById("suggestionsBox");
suggestionsBox.classList.add("hidden");
// Trigger the search
findSimilarDatasets();
}
// Add accordion functionality
function toggleAccordion() {
const content = document.getElementById("accordionContent");
const icon = document.getElementById("accordionIcon");
content.classList.toggle("hidden");
icon.style.transform = content.classList.contains("hidden")
? "rotate(0deg)"
: "rotate(90deg)";
}
// Add the loadMore function
function loadMore() {
currentPage += 1;
const activeTab = document.querySelector(".tab-trigger.active").id;
if (activeTab === "searchTab") {
const searchQuery = document.getElementById("searchInput").value;
searchDatasets(searchQuery, currentPage);
} else {
findSimilarDatasets(currentPage);
}
}
// Add this new function for toggling the preview
function togglePreview(datasetId) {
const content = document.getElementById(`preview-content-${datasetId}`);
const icon = document.getElementById(`preview-icon-${datasetId}`);
content.classList.toggle("hidden");
icon.style.transform = content.classList.contains("hidden")
? "rotate(0deg)"
: "rotate(90deg)";
}
// Update the share function name and remove the datasetId parameter
async function shareResults() {
const activeTab = document.querySelector(".tab-trigger.active").id;
const currentURL = new URL(window.location);
// Update URL based on active tab
if (activeTab === "searchTab") {
const searchQuery = document.getElementById("searchInput").value;
currentURL.searchParams.set("q", searchQuery);
currentURL.searchParams.delete("similar");
} else {
const datasetId = document.getElementById("datasetInput").value;
currentURL.searchParams.set("similar", datasetId);
currentURL.searchParams.delete("q");
}
try {
if (navigator.share) {
// Use native sharing on supported devices
await navigator.share({
title: "Hub Semantic Search",
text: "Check out these dataset search results",
url: currentURL.toString(),
});
} else {
// Fallback to clipboard copy
await navigator.clipboard.writeText(currentURL.toString());
// Show temporary success message
const button = event.target.closest("button");
const originalHTML = button.innerHTML;
button.innerHTML = '<i data-lucide="check"></i> Copied!';
lucide.createIcons();
setTimeout(() => {
button.innerHTML = originalHTML;
lucide.createIcons();
}, 2000);
}
} catch (error) {
console.error("Error sharing:", error);
}
}
// Update the event listeners section
document.addEventListener("DOMContentLoaded", async () => {
const datasetInput = document.getElementById("datasetInput");
let programmaticFocus = false;
// Add input event listener for suggestions
datasetInput.addEventListener("input", async (e) => {
const suggestionsBox = document.getElementById("suggestionsBox");
const value = e.target.value;
if (!programmaticFocus) {
if (!value) {
// Show trending datasets when input is empty
const trending = await fetchTrendingDatasets();
displaySuggestions(trending, suggestionsBox);
} else {
// Filter trending datasets based on input
const trending = await fetchTrendingDatasets();
const filtered = trending.filter((dataset) =>
dataset.toLowerCase().includes(value.toLowerCase())
);
displaySuggestions(filtered, suggestionsBox);
}
}
});
// Show trending datasets on focus only when not programmatically focused
datasetInput.addEventListener("focus", async () => {
if (!programmaticFocus) {
const suggestionsBox = document.getElementById("suggestionsBox");
const trending = await fetchTrendingDatasets();
displaySuggestions(trending, suggestionsBox);
}
programmaticFocus = false;
});
// Handle initial URL parameters
if (INITIAL_SEARCH) {
switchTab("search");
document.getElementById("searchInput").value = INITIAL_SEARCH;
await searchDatasets(INITIAL_SEARCH);
} else if (INITIAL_SIMILAR) {
switchTab("similar");
document.getElementById("datasetInput").value = INITIAL_SIMILAR;
await findSimilarDatasets();
}
});
</script>
</body>
</html>