Spaces:
Running
Running
/** | |
* Main Alpine.js application for OCR Text Explorer | |
*/ | |
document.addEventListener('alpine:init', () => { | |
Alpine.data('ocrExplorer', () => ({ | |
// Dataset state | |
datasetId: 'davanstrien/exams-ocr', | |
datasetConfig: 'default', | |
datasetSplit: 'train', | |
// Navigation state | |
currentIndex: 0, | |
totalSamples: null, | |
currentSample: null, | |
jumpToPage: '', | |
// UI state | |
loading: false, | |
error: null, | |
activeTab: 'comparison', | |
diffMode: 'char', | |
darkMode: false, | |
showAbout: false, | |
showFlowView: false, | |
showDock: false, | |
// Flow view state | |
flowItems: [], | |
flowStartIndex: 0, | |
flowVisibleCount: 7, | |
flowOffset: 0, | |
// Dock state | |
dockItems: [], | |
dockHideTimeout: null, | |
dockStartIndex: 0, | |
dockVisibleCount: 10, | |
// Computed diff HTML | |
diffHtml: '', | |
// Statistics | |
similarity: 0, | |
charStats: { total: 0, added: 0, removed: 0 }, | |
wordStats: { original: 0, improved: 0 }, | |
// API instance | |
api: null, | |
async init() { | |
// Initialize API | |
this.api = new DatasetAPI(); | |
// Apply dark mode from localStorage | |
this.darkMode = localStorage.getItem('darkMode') === 'true'; | |
this.$watch('darkMode', value => { | |
localStorage.setItem('darkMode', value); | |
document.documentElement.classList.toggle('dark', value); | |
}); | |
document.documentElement.classList.toggle('dark', this.darkMode); | |
// Setup keyboard navigation | |
this.setupKeyboardNavigation(); | |
// Load initial dataset | |
await this.loadDataset(); | |
}, | |
setupKeyboardNavigation() { | |
document.addEventListener('keydown', (e) => { | |
// Ignore if user is typing in input | |
if (e.target.tagName === 'INPUT') return; | |
switch(e.key) { | |
case 'ArrowLeft': | |
e.preventDefault(); | |
if (e.shiftKey && this.showDock) { | |
this.scrollDockLeft(); | |
} else { | |
this.previousSample(); | |
} | |
break; | |
case 'ArrowRight': | |
e.preventDefault(); | |
if (e.shiftKey && this.showDock) { | |
this.scrollDockRight(); | |
} else { | |
this.nextSample(); | |
} | |
break; | |
case 'k': | |
case 'K': | |
e.preventDefault(); | |
this.previousSample(); | |
break; | |
case 'j': | |
case 'J': | |
e.preventDefault(); | |
this.nextSample(); | |
break; | |
case '1': | |
this.activeTab = 'comparison'; | |
break; | |
case '2': | |
this.activeTab = 'diff'; | |
break; | |
case '3': | |
this.activeTab = 'improved'; | |
break; | |
case 'v': | |
case 'V': | |
// Toggle dock with V key | |
if (this.showDock) { | |
this.hideDockPreview(); | |
} else { | |
this.showDockPreview(); | |
} | |
break; | |
} | |
}); | |
}, | |
async loadDataset() { | |
this.loading = true; | |
this.error = null; | |
try { | |
// Validate dataset | |
await this.api.validateDataset(this.datasetId); | |
// Get dataset info | |
const info = await this.api.getDatasetInfo(this.datasetId); | |
this.datasetConfig = info.defaultConfig; | |
this.datasetSplit = info.defaultSplit; | |
// Get total rows | |
this.totalSamples = await this.api.getTotalRows( | |
this.datasetId, | |
this.datasetConfig, | |
this.datasetSplit | |
); | |
// Load first sample | |
this.currentIndex = 0; | |
await this.loadSample(0); | |
} catch (error) { | |
this.error = error.message; | |
} finally { | |
this.loading = false; | |
} | |
}, | |
async loadSample(index) { | |
try { | |
const data = await this.api.getRow( | |
this.datasetId, | |
this.datasetConfig, | |
this.datasetSplit, | |
index | |
); | |
this.currentSample = data.row; | |
this.currentIndex = index; | |
// Update diff when sample changes | |
this.updateDiff(); | |
// Update URL without triggering navigation | |
const url = new URL(window.location); | |
url.searchParams.set('dataset', this.datasetId); | |
url.searchParams.set('index', index); | |
window.history.replaceState({}, '', url); | |
} catch (error) { | |
this.error = `Failed to load sample: ${error.message}`; | |
} | |
}, | |
async nextSample() { | |
if (this.currentIndex < this.totalSamples - 1) { | |
await this.loadSample(this.currentIndex + 1); | |
} | |
}, | |
async previousSample() { | |
if (this.currentIndex > 0) { | |
await this.loadSample(this.currentIndex - 1); | |
} | |
}, | |
async jumpToSample() { | |
const pageNum = parseInt(this.jumpToPage); | |
if (!isNaN(pageNum) && pageNum >= 1 && pageNum <= this.totalSamples) { | |
// Convert 1-based page number to 0-based index | |
await this.loadSample(pageNum - 1); | |
// Clear the input after jumping | |
this.jumpToPage = ''; | |
} else { | |
// Show error or just reset | |
this.jumpToPage = ''; | |
} | |
}, | |
getOriginalText() { | |
if (!this.currentSample) return ''; | |
const columns = this.api.detectColumns(null, this.currentSample); | |
return this.currentSample[columns.originalText] || 'No original text found'; | |
}, | |
getImprovedText() { | |
if (!this.currentSample) return ''; | |
const columns = this.api.detectColumns(null, this.currentSample); | |
return this.currentSample[columns.improvedText] || 'No improved text found'; | |
}, | |
getImageData() { | |
if (!this.currentSample) return null; | |
const columns = this.api.detectColumns(null, this.currentSample); | |
return columns.image ? this.currentSample[columns.image] : null; | |
}, | |
getImageSrc() { | |
const imageData = this.getImageData(); | |
return imageData?.src || ''; | |
}, | |
getImageDimensions() { | |
const imageData = this.getImageData(); | |
if (imageData?.width && imageData?.height) { | |
return `${imageData.width}×${imageData.height}`; | |
} | |
return null; | |
}, | |
updateDiff() { | |
const original = this.getOriginalText(); | |
const improved = this.getImprovedText(); | |
// Calculate statistics | |
this.calculateStatistics(original, improved); | |
// Use diff utility based on mode | |
switch(this.diffMode) { | |
case 'char': | |
this.diffHtml = createCharacterDiff(original, improved); | |
break; | |
case 'word': | |
this.diffHtml = createWordDiff(original, improved); | |
break; | |
case 'line': | |
this.diffHtml = createLineDiff(original, improved); | |
break; | |
} | |
}, | |
calculateStatistics(original, improved) { | |
// Calculate similarity | |
this.similarity = calculateSimilarity(original, improved); | |
// Character statistics | |
const charDiff = this.getCharacterDiffStats(original, improved); | |
this.charStats = charDiff; | |
// Word statistics | |
const originalWords = original.split(/\s+/).filter(w => w.length > 0); | |
const improvedWords = improved.split(/\s+/).filter(w => w.length > 0); | |
this.wordStats = { | |
original: originalWords.length, | |
improved: improvedWords.length | |
}; | |
}, | |
getCharacterDiffStats(original, improved) { | |
const dp = computeLCS(original, improved); | |
const diff = buildDiff(original, improved, dp); | |
let added = 0; | |
let removed = 0; | |
let unchanged = 0; | |
for (const part of diff) { | |
if (part.type === 'insert') { | |
added += part.value.length; | |
} else if (part.type === 'delete') { | |
removed += part.value.length; | |
} else { | |
unchanged += part.value.length; | |
} | |
} | |
return { | |
total: original.length, | |
added: added, | |
removed: removed, | |
unchanged: unchanged | |
}; | |
}, | |
async handleImageError(event) { | |
// Try to refresh the image URL | |
console.log('Image failed to load, refreshing URL...'); | |
try { | |
const data = await this.api.refreshImageUrl( | |
this.datasetId, | |
this.datasetConfig, | |
this.datasetSplit, | |
this.currentIndex | |
); | |
// Update the image source | |
if (data.row && data.row[this.api.detectColumns(null, data.row).image]?.src) { | |
event.target.src = data.row[this.api.detectColumns(null, data.row).image].src; | |
} | |
} catch (error) { | |
console.error('Failed to refresh image URL:', error); | |
// Set a placeholder image | |
event.target.src = ''; | |
} | |
}, | |
exportComparison() { | |
const original = this.getOriginalText(); | |
const improved = this.getImprovedText(); | |
const metadata = { | |
dataset: this.datasetId, | |
page: this.currentIndex + 1, | |
totalPages: this.totalSamples, | |
exportDate: new Date().toISOString(), | |
similarity: `${this.similarity}%`, | |
statistics: { | |
characters: this.charStats, | |
words: this.wordStats | |
} | |
}; | |
// Create export content | |
let content = `OCR Text Comparison Export\n`; | |
content += `==========================\n\n`; | |
content += `Dataset: ${metadata.dataset}\n`; | |
content += `Page: ${metadata.page} of ${metadata.totalPages}\n`; | |
content += `Export Date: ${new Date().toLocaleString()}\n`; | |
content += `Similarity: ${metadata.similarity}\n`; | |
content += `Characters: ${metadata.statistics.characters.total} total, `; | |
content += `${metadata.statistics.characters.added} added, `; | |
content += `${metadata.statistics.characters.removed} removed\n`; | |
content += `Words: ${metadata.statistics.words.original} → ${metadata.statistics.words.improved}\n`; | |
content += `\n${'='.repeat(50)}\n\n`; | |
content += `ORIGINAL OCR:\n`; | |
content += `${'='.repeat(50)}\n`; | |
content += original; | |
content += `\n\n${'='.repeat(50)}\n\n`; | |
content += `IMPROVED OCR:\n`; | |
content += `${'='.repeat(50)}\n`; | |
content += improved; | |
// Download file | |
const blob = new Blob([content], { type: 'text/plain' }); | |
const url = URL.createObjectURL(blob); | |
const a = document.createElement('a'); | |
a.href = url; | |
a.download = `ocr-comparison-${this.datasetId.replace('/', '-')}-page-${this.currentIndex + 1}.txt`; | |
document.body.appendChild(a); | |
a.click(); | |
document.body.removeChild(a); | |
URL.revokeObjectURL(url); | |
}, | |
// Flow view methods | |
async toggleFlowView() { | |
this.showFlowView = !this.showFlowView; | |
if (this.showFlowView) { | |
// Reset to center around current page when opening | |
this.flowStartIndex = Math.max(0, this.currentIndex - Math.floor(this.flowVisibleCount / 2)); | |
await this.loadFlowItems(); | |
} | |
}, | |
async loadFlowItems() { | |
// Load thumbnails from flowStartIndex | |
const startIdx = this.flowStartIndex; | |
this.flowItems = []; | |
// Load visible items | |
for (let i = 0; i < this.flowVisibleCount && (startIdx + i) < this.totalSamples; i++) { | |
const idx = startIdx + i; | |
try { | |
const data = await this.api.getRow( | |
this.datasetId, | |
this.datasetConfig, | |
this.datasetSplit, | |
idx | |
); | |
const columns = this.api.detectColumns(null, data.row); | |
const imageData = columns.image ? data.row[columns.image] : null; | |
this.flowItems.push({ | |
index: idx, | |
imageSrc: imageData?.src || '', | |
row: data.row | |
}); | |
} catch (error) { | |
console.error(`Failed to load flow item ${idx}:`, error); | |
} | |
} | |
}, | |
scrollFlowLeft() { | |
if (this.flowStartIndex > 0) { | |
this.flowStartIndex = Math.max(0, this.flowStartIndex - this.flowVisibleCount); | |
this.loadFlowItems(); | |
} | |
}, | |
scrollFlowRight() { | |
if (this.flowStartIndex < this.totalSamples - this.flowVisibleCount) { | |
this.flowStartIndex = Math.min( | |
this.totalSamples - this.flowVisibleCount, | |
this.flowStartIndex + this.flowVisibleCount | |
); | |
this.loadFlowItems(); | |
} | |
}, | |
async jumpToFlowPage(index) { | |
this.showFlowView = false; | |
await this.loadSample(index); | |
}, | |
async handleFlowImageError(event, index) { | |
// Try to refresh the image URL for flow item | |
try { | |
const data = await this.api.refreshImageUrl( | |
this.datasetId, | |
this.datasetConfig, | |
this.datasetSplit, | |
index | |
); | |
if (data.row) { | |
const columns = this.api.detectColumns(null, data.row); | |
const imageData = columns.image ? data.row[columns.image] : null; | |
if (imageData?.src) { | |
event.target.src = imageData.src; | |
// Update the flow item | |
const flowItem = this.flowItems.find(item => item.index === index); | |
if (flowItem) { | |
flowItem.imageSrc = imageData.src; | |
} | |
} | |
} | |
} catch (error) { | |
console.error('Failed to refresh flow image URL:', error); | |
} | |
}, | |
// Dock methods | |
async showDockPreview() { | |
// Clear any hide timeout | |
if (this.dockHideTimeout) { | |
clearTimeout(this.dockHideTimeout); | |
this.dockHideTimeout = null; | |
} | |
this.showDock = true; | |
// Center dock around current page | |
this.dockStartIndex = Math.max(0, | |
Math.min( | |
this.currentIndex - Math.floor(this.dockVisibleCount / 2), | |
this.totalSamples - this.dockVisibleCount | |
) | |
); | |
// Always reload dock items to show current position | |
await this.loadDockItems(); | |
}, | |
hideDockPreview() { | |
// Add a small delay to prevent flickering | |
this.dockHideTimeout = setTimeout(() => { | |
this.showDock = false; | |
}, 300); | |
}, | |
async loadDockItems() { | |
// Load thumbnails based on dock start index | |
const endIdx = Math.min(this.totalSamples, this.dockStartIndex + this.dockVisibleCount); | |
this.dockItems = []; | |
for (let i = this.dockStartIndex; i < endIdx; i++) { | |
try { | |
const data = await this.api.getRow( | |
this.datasetId, | |
this.datasetConfig, | |
this.datasetSplit, | |
i | |
); | |
const columns = this.api.detectColumns(null, data.row); | |
const imageData = columns.image ? data.row[columns.image] : null; | |
this.dockItems.push({ | |
index: i, | |
imageSrc: imageData?.src || '', | |
row: data.row | |
}); | |
} catch (error) { | |
console.error(`Failed to load dock item ${i}:`, error); | |
} | |
} | |
}, | |
async scrollDockLeft() { | |
if (this.dockStartIndex > 0) { | |
this.dockStartIndex = Math.max(0, this.dockStartIndex - Math.floor(this.dockVisibleCount / 2)); | |
await this.loadDockItems(); | |
} | |
}, | |
async scrollDockRight() { | |
if (this.dockStartIndex < this.totalSamples - this.dockVisibleCount) { | |
this.dockStartIndex = Math.min( | |
this.totalSamples - this.dockVisibleCount, | |
this.dockStartIndex + Math.floor(this.dockVisibleCount / 2) | |
); | |
await this.loadDockItems(); | |
} | |
}, | |
async jumpToDockPage(index) { | |
this.showDock = false; | |
await this.loadSample(index); | |
}, | |
// Watch for diff mode changes | |
initWatchers() { | |
this.$watch('diffMode', () => this.updateDiff()); | |
this.$watch('currentSample', () => this.updateDiff()); | |
} | |
})); | |
}); | |
// Initialize watchers after Alpine loads | |
document.addEventListener('alpine:initialized', () => { | |
Alpine.store('ocrExplorer')?.initWatchers?.(); | |
}); |