davanstrien's picture
davanstrien HF staff
static version
4edfecf
raw
history blame
18.4 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Hub Semantic Search</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://unpkg.com/lucide@latest"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lodash.min.js"></script>
</head>
<body>
<div class="w-full max-w-4xl mx-auto p-4 space-y-8">
<h1 class="text-3xl font-bold text-gray-800">Hub Semantic Search</h1>
<div
class="bg-gradient-to-br from-blue-50 to-indigo-50 p-8 rounded-xl shadow-sm border border-blue-100 mb-8"
>
<h2
class="text-xl font-semibold mb-4 text-gray-800 flex items-center gap-2"
>
<i data-lucide="search" class="text-blue-500"></i>
Welcome to Hub Semantic Search
</h2>
<p class="text-gray-700 mb-4">
Find and explore the πŸ€— Hub using via semantic search on LLM generated
summaries!
</p>
<div class="bg-blue-100 text-blue-800 px-4 py-2 rounded-md mb-4">
<p class="flex items-center gap-2">
<i data-lucide="info"></i> Currently supporting dataset search only.
Model search coming soon!
</p>
</div>
<button
onclick="toggleAccordion()"
id="accordionButton"
class="text-blue-500 hover:text-blue-700 flex items-center gap-2 mb-4"
>
<i
data-lucide="chevron-right"
id="accordionIcon"
class="transition-transform"
></i>
<span>How it works</span>
</button>
<div id="accordionContent" class="hidden">
<ul class="list-disc list-inside space-y-2 text-gray-600 ml-4">
<li>
<strong>AI-Generated Summaries:</strong> Each dataset is indexed
using a concise, one-sentence summary generated by a large
language model trained on thousands of Hugging Face dataset cards
</li>
<li>
<strong>Semantic Search:</strong> Enter keywords or descriptions
to find semantically similar resources based on these AI-generated
summaries
</li>
<li>
<strong>Find Similar:</strong> Enter a dataset ID (e.g.,
"airtrain-ai/fineweb-edu-fortified") to discover related resources
using semantic matching
</li>
</ul>
</div>
</div>
<div class="tabs w-full">
<div class="tab-list flex gap-2 border-b mb-6">
<button
onclick="switchTab('search')"
id="searchTab"
class="tab-trigger active px-6 py-3 flex items-center gap-2 border-b-2 border-transparent hover:bg-gray-50 transition-colors"
>
<i data-lucide="search"></i> Search
</button>
<button
onclick="switchTab('similar')"
id="similarTab"
class="tab-trigger px-6 py-3 flex items-center gap-2 border-b-2 border-transparent hover:bg-gray-50 transition-colors"
>
<i data-lucide="arrow-right"></i> Find Similar
</button>
</div>
<div id="searchContent" class="tab-content space-y-4">
<div
class="card bg-white p-8 rounded-xl shadow-sm border border-gray-100"
>
<p class="text-gray-600 mb-4">
Enter keywords to search through dataset descriptions. The search
will automatically update as you type.
</p>
<div class="relative">
<input
type="text"
id="searchInput"
placeholder="Type to search (minimum 3 characters)..."
class="w-full p-3 border rounded-lg pr-10 focus:ring-2 focus:ring-blue-100 focus:border-blue-300 transition-all outline-none"
/>
<div id="searchLoader" class="hidden absolute right-3 top-2">
<i data-lucide="loader-2" class="animate-spin"></i>
</div>
</div>
</div>
</div>
<div id="similarContent" class="hidden tab-content space-y-4">
<div
class="card bg-white p-8 rounded-xl shadow-sm border border-gray-100"
>
<p class="text-gray-600 mb-4">
Enter a dataset ID to find similar datasets. You can also click
"Find Similar" on any search result to quickly find related
datasets.
</p>
<div class="flex gap-3">
<input
type="text"
id="datasetInput"
placeholder="Enter dataset ID..."
class="w-full p-3 border rounded-lg focus:ring-2 focus:ring-blue-100 focus:border-blue-300 transition-all outline-none"
/>
<button
onclick="findSimilarDatasets()"
class="px-6 py-3 bg-blue-500 hover:bg-blue-600 text-white rounded-lg transition-colors flex items-center gap-2"
>
<i data-lucide="search"></i>
Search
</button>
<div id="similarLoader" class="hidden">
<i data-lucide="loader-2" class="animate-spin"></i>
</div>
</div>
</div>
</div>
<div
id="errorMessage"
class="hidden mt-4 p-4 text-red-600 bg-red-50 rounded-md"
></div>
<div id="resultsContainer" class="mt-6 space-y-4"></div>
</div>
</div>
<style>
.tab-trigger.active {
border-bottom-color: #3b82f6;
color: #3b82f6;
}
</style>
<script>
// Configuration
const API_URL =
"https://davanstrien-huggingface-datasets-search-v2.hf.space";
const MIN_SEARCH_LENGTH = 3;
const DEBOUNCE_MS = 300;
const RESULTS_PER_PAGE = 5;
const MAX_RESULTS = 100;
let currentPage = 1;
// Initialize Lucide icons
lucide.createIcons();
// Tab switching
function switchTab(tabId) {
currentPage = 1;
document
.querySelectorAll(".tab-content")
.forEach((content) => content.classList.add("hidden"));
document
.querySelectorAll(".tab-trigger")
.forEach((trigger) => trigger.classList.remove("active"));
document.getElementById(`${tabId}Content`).classList.remove("hidden");
document.getElementById(`${tabId}Tab`).classList.add("active");
}
// Create result card
function createResultCard(result) {
const cardHtml = `
<div class="card bg-white p-6 rounded-lg shadow hover:shadow-md transition-shadow">
<div class="flex items-start justify-between">
<div class="space-y-2 w-full">
<div class="flex items-center justify-between">
<div class="flex items-center gap-2">
<i data-lucide="database" class="text-blue-500"></i>
<h3 class="text-lg font-semibold">${
result.dataset_id
}</h3>
</div>
<div class="flex items-center gap-2">
<div class="flex items-center gap-4 text-sm text-gray-500 mr-4">
<span class="flex items-center gap-1">
<i data-lucide="heart" class="w-4 h-4"></i>
${result.likes}
</span>
<span class="flex items-center gap-1">
<i data-lucide="download" class="w-4 h-4"></i>
${result.downloads}
</span>
</div>
<span class="bg-blue-50 px-2 py-1 rounded text-sm">
${(result.similarity * 100).toFixed(
1
)}% match
</span>
<button
onclick="findSimilarFromResult('${
result.dataset_id
}')"
class="flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700"
>
<i data-lucide="arrow-right"></i>
Find Similar
</button>
</div>
</div>
<p class="text-sm text-gray-600">${result.summary}</p>
<!-- Add preview section that starts hidden -->
<div id="preview-section-${
result.dataset_id
}" class="mt-4 border-t pt-4 hidden">
<button
onclick="togglePreview('${result.dataset_id}')"
class="flex items-center gap-2 text-sm text-gray-600 hover:text-gray-800"
>
<i data-lucide="chevron-right" id="preview-icon-${
result.dataset_id
}" class="transition-transform"></i>
Preview Dataset
</button>
<div id="preview-content-${
result.dataset_id
}" class="hidden mt-4">
<iframe
src="https://huggingface.co/datasets/${
result.dataset_id
}/embed/viewer/default/train"
frameborder="0"
width="100%"
height="560px"
></iframe>
</div>
</div>
<a href="https://huggingface.co/datasets/${
result.dataset_id
}"
target="_blank"
class="inline-flex items-center gap-1 text-sm text-blue-500 hover:text-blue-700 mt-2">
<i data-lucide="external-link" class="w-4 h-4"></i>
View on Hugging Face Hub
</a>
</div>
</div>
</div>
`;
// After rendering the card, check if preview is available
checkDatasetValidity(result.dataset_id);
return cardHtml;
}
// Add function to check dataset validity
async function checkDatasetValidity(datasetId) {
try {
const response = await fetch(
`https://datasets-server.huggingface.co/is-valid?dataset=${datasetId}`
);
const data = await response.json();
// Show preview section only if viewer is available
if (data.viewer) {
const previewSection = document.getElementById(
`preview-section-${datasetId}`
);
if (previewSection) {
previewSection.classList.remove("hidden");
}
}
} catch (error) {
console.error(
`Failed to check validity for dataset ${datasetId}:`,
error
);
}
}
// Search datasets
const searchDatasets = _.debounce(async (query, page = 1) => {
if (query.length < MIN_SEARCH_LENGTH) {
document.getElementById("resultsContainer").innerHTML = "";
return;
}
document.getElementById("searchLoader").classList.remove("hidden");
document.getElementById("errorMessage").classList.add("hidden");
try {
const response = await fetch(
`${API_URL}/search/datasets?query=${encodeURIComponent(query)}&k=${
RESULTS_PER_PAGE * page
}`
);
if (!response.ok) throw new Error("Search failed");
const data = await response.json();
console.log("Search results:", data);
displayResults(data.results, page);
} catch (error) {
console.error("Search error:", error);
showError("Failed to perform search. Please try again.");
} finally {
document.getElementById("searchLoader").classList.add("hidden");
}
}, DEBOUNCE_MS);
// Find similar datasets
async function findSimilarDatasets(page = 1) {
const datasetId = document.getElementById("datasetInput").value;
if (!datasetId) return;
document.getElementById("similarLoader").classList.remove("hidden");
document.getElementById("errorMessage").classList.add("hidden");
try {
const response = await fetch(
`${API_URL}/similarity/datasets?dataset_id=${encodeURIComponent(
datasetId
)}&k=${RESULTS_PER_PAGE * page}`
);
if (!response.ok) throw new Error("Similarity search failed");
const data = await response.json();
displayResults(data.results, page);
} catch (error) {
showError("Failed to find similar datasets. Please try again.");
} finally {
document.getElementById("similarLoader").classList.add("hidden");
}
}
// Display results
function displayResults(results, page = 1) {
const container = document.getElementById("resultsContainer");
console.log("Displaying results:", results);
if (results && results.length > 0) {
container.innerHTML = `
<div class="flex justify-between items-center">
<h2 class="text-lg font-semibold">Results</h2>
<span class="text-sm text-gray-500">Found ${
results.length
} results</span>
</div>
${results.map((result) => createResultCard(result)).join("")}
${
results.length >= RESULTS_PER_PAGE * page &&
RESULTS_PER_PAGE * (page + 1) <= MAX_RESULTS
? `<button
onclick="loadMore()"
class="w-full mt-4 px-6 py-3 bg-gray-100 hover:bg-gray-200 text-gray-700 rounded-lg transition-colors flex items-center gap-2 justify-center"
>
<i data-lucide="more-horizontal"></i>
Load More Results
</button>`
: results.length >= MAX_RESULTS
? `<div class="text-center mt-4 p-6 bg-blue-50 rounded-lg">
<p class="text-gray-700 mb-3">πŸŽ‰ You've reached the end of our dataset journey! (${MAX_RESULTS} results)</p>
<p class="text-gray-600 mb-4">Can't find what you're looking for? Why not create and share your own dataset?</p>
<a href="https://huggingface.co/docs/datasets/upload_dataset"
target="_blank"
class="inline-flex items-center gap-2 text-blue-500 hover:text-blue-700">
<i data-lucide="external-link"></i>
Learn how to share your dataset on Hugging Face
</a>
</div>`
: ""
}
`;
lucide.createIcons();
} else {
container.innerHTML = `
<div class="text-center text-gray-500">
No results found
</div>
`;
}
}
// Show error message
function showError(message) {
const errorElement = document.getElementById("errorMessage");
errorElement.textContent = message;
errorElement.classList.remove("hidden");
}
// Event listeners
document
.getElementById("searchInput")
.addEventListener("input", (e) => searchDatasets(e.target.value));
document
.getElementById("datasetInput")
.addEventListener("keydown", (e) => {
if (e.key === "Enter") findSimilarDatasets();
});
// Add new function to handle finding similar datasets from results
function findSimilarFromResult(datasetId) {
// Switch to the similar tab
switchTab("similar");
// Set the dataset ID in the input
const datasetInput = document.getElementById("datasetInput");
datasetInput.value = datasetId;
// Trigger the search
findSimilarDatasets();
}
// Add accordion functionality
function toggleAccordion() {
const content = document.getElementById("accordionContent");
const icon = document.getElementById("accordionIcon");
content.classList.toggle("hidden");
icon.style.transform = content.classList.contains("hidden")
? "rotate(0deg)"
: "rotate(90deg)";
}
// Add the loadMore function
function loadMore() {
currentPage += 1;
const activeTab = document.querySelector(".tab-trigger.active").id;
if (activeTab === "searchTab") {
const searchQuery = document.getElementById("searchInput").value;
searchDatasets(searchQuery, currentPage);
} else {
findSimilarDatasets(currentPage);
}
}
// Add this new function for toggling the preview
function togglePreview(datasetId) {
const content = document.getElementById(`preview-content-${datasetId}`);
const icon = document.getElementById(`preview-icon-${datasetId}`);
content.classList.toggle("hidden");
icon.style.transform = content.classList.contains("hidden")
? "rotate(0deg)"
: "rotate(90deg)";
}
</script>
</body>
</html>