davanstrien's picture
davanstrien HF Staff
Configure OCR Time Capsule with default dataset and branding
c49cb47
raw
history blame
20.4 kB
/**
* Main Alpine.js application for OCR Text Explorer
*/
document.addEventListener('alpine:init', () => {
Alpine.data('ocrExplorer', () => ({
// Dataset state
datasetId: 'davanstrien/exams-ocr',
datasetConfig: 'default',
datasetSplit: 'train',
// Navigation state
currentIndex: 0,
totalSamples: null,
currentSample: null,
jumpToPage: '',
// UI state
loading: false,
error: null,
activeTab: 'comparison',
diffMode: 'char',
darkMode: false,
showAbout: false,
showFlowView: false,
showDock: false,
// Flow view state
flowItems: [],
flowStartIndex: 0,
flowVisibleCount: 7,
flowOffset: 0,
// Dock state
dockItems: [],
dockHideTimeout: null,
dockStartIndex: 0,
dockVisibleCount: 10,
// Computed diff HTML
diffHtml: '',
// Statistics
similarity: 0,
charStats: { total: 0, added: 0, removed: 0 },
wordStats: { original: 0, improved: 0 },
// API instance
api: null,
async init() {
// Initialize API
this.api = new DatasetAPI();
// Apply dark mode from localStorage
this.darkMode = localStorage.getItem('darkMode') === 'true';
this.$watch('darkMode', value => {
localStorage.setItem('darkMode', value);
document.documentElement.classList.toggle('dark', value);
});
document.documentElement.classList.toggle('dark', this.darkMode);
// Setup keyboard navigation
this.setupKeyboardNavigation();
// Load initial dataset
await this.loadDataset();
},
setupKeyboardNavigation() {
document.addEventListener('keydown', (e) => {
// Ignore if user is typing in input
if (e.target.tagName === 'INPUT') return;
switch(e.key) {
case 'ArrowLeft':
e.preventDefault();
if (e.shiftKey && this.showDock) {
this.scrollDockLeft();
} else {
this.previousSample();
}
break;
case 'ArrowRight':
e.preventDefault();
if (e.shiftKey && this.showDock) {
this.scrollDockRight();
} else {
this.nextSample();
}
break;
case 'k':
case 'K':
e.preventDefault();
this.previousSample();
break;
case 'j':
case 'J':
e.preventDefault();
this.nextSample();
break;
case '1':
this.activeTab = 'comparison';
break;
case '2':
this.activeTab = 'diff';
break;
case '3':
this.activeTab = 'improved';
break;
case 'v':
case 'V':
// Toggle dock with V key
if (this.showDock) {
this.hideDockPreview();
} else {
this.showDockPreview();
}
break;
}
});
},
async loadDataset() {
this.loading = true;
this.error = null;
try {
// Validate dataset
await this.api.validateDataset(this.datasetId);
// Get dataset info
const info = await this.api.getDatasetInfo(this.datasetId);
this.datasetConfig = info.defaultConfig;
this.datasetSplit = info.defaultSplit;
// Get total rows
this.totalSamples = await this.api.getTotalRows(
this.datasetId,
this.datasetConfig,
this.datasetSplit
);
// Load first sample
this.currentIndex = 0;
await this.loadSample(0);
} catch (error) {
this.error = error.message;
} finally {
this.loading = false;
}
},
async loadSample(index) {
try {
const data = await this.api.getRow(
this.datasetId,
this.datasetConfig,
this.datasetSplit,
index
);
this.currentSample = data.row;
this.currentIndex = index;
// Update diff when sample changes
this.updateDiff();
// Update URL without triggering navigation
const url = new URL(window.location);
url.searchParams.set('dataset', this.datasetId);
url.searchParams.set('index', index);
window.history.replaceState({}, '', url);
} catch (error) {
this.error = `Failed to load sample: ${error.message}`;
}
},
async nextSample() {
if (this.currentIndex < this.totalSamples - 1) {
await this.loadSample(this.currentIndex + 1);
}
},
async previousSample() {
if (this.currentIndex > 0) {
await this.loadSample(this.currentIndex - 1);
}
},
async jumpToSample() {
const pageNum = parseInt(this.jumpToPage);
if (!isNaN(pageNum) && pageNum >= 1 && pageNum <= this.totalSamples) {
// Convert 1-based page number to 0-based index
await this.loadSample(pageNum - 1);
// Clear the input after jumping
this.jumpToPage = '';
} else {
// Show error or just reset
this.jumpToPage = '';
}
},
getOriginalText() {
if (!this.currentSample) return '';
const columns = this.api.detectColumns(null, this.currentSample);
return this.currentSample[columns.originalText] || 'No original text found';
},
getImprovedText() {
if (!this.currentSample) return '';
const columns = this.api.detectColumns(null, this.currentSample);
return this.currentSample[columns.improvedText] || 'No improved text found';
},
getImageData() {
if (!this.currentSample) return null;
const columns = this.api.detectColumns(null, this.currentSample);
return columns.image ? this.currentSample[columns.image] : null;
},
getImageSrc() {
const imageData = this.getImageData();
return imageData?.src || '';
},
getImageDimensions() {
const imageData = this.getImageData();
if (imageData?.width && imageData?.height) {
return `${imageData.width}×${imageData.height}`;
}
return null;
},
updateDiff() {
const original = this.getOriginalText();
const improved = this.getImprovedText();
// Calculate statistics
this.calculateStatistics(original, improved);
// Use diff utility based on mode
switch(this.diffMode) {
case 'char':
this.diffHtml = createCharacterDiff(original, improved);
break;
case 'word':
this.diffHtml = createWordDiff(original, improved);
break;
case 'line':
this.diffHtml = createLineDiff(original, improved);
break;
}
},
calculateStatistics(original, improved) {
// Calculate similarity
this.similarity = calculateSimilarity(original, improved);
// Character statistics
const charDiff = this.getCharacterDiffStats(original, improved);
this.charStats = charDiff;
// Word statistics
const originalWords = original.split(/\s+/).filter(w => w.length > 0);
const improvedWords = improved.split(/\s+/).filter(w => w.length > 0);
this.wordStats = {
original: originalWords.length,
improved: improvedWords.length
};
},
getCharacterDiffStats(original, improved) {
const dp = computeLCS(original, improved);
const diff = buildDiff(original, improved, dp);
let added = 0;
let removed = 0;
let unchanged = 0;
for (const part of diff) {
if (part.type === 'insert') {
added += part.value.length;
} else if (part.type === 'delete') {
removed += part.value.length;
} else {
unchanged += part.value.length;
}
}
return {
total: original.length,
added: added,
removed: removed,
unchanged: unchanged
};
},
async handleImageError(event) {
// Try to refresh the image URL
console.log('Image failed to load, refreshing URL...');
try {
const data = await this.api.refreshImageUrl(
this.datasetId,
this.datasetConfig,
this.datasetSplit,
this.currentIndex
);
// Update the image source
if (data.row && data.row[this.api.detectColumns(null, data.row).image]?.src) {
event.target.src = data.row[this.api.detectColumns(null, data.row).image].src;
}
} catch (error) {
console.error('Failed to refresh image URL:', error);
// Set a placeholder image
event.target.src = '';
}
},
exportComparison() {
const original = this.getOriginalText();
const improved = this.getImprovedText();
const metadata = {
dataset: this.datasetId,
page: this.currentIndex + 1,
totalPages: this.totalSamples,
exportDate: new Date().toISOString(),
similarity: `${this.similarity}%`,
statistics: {
characters: this.charStats,
words: this.wordStats
}
};
// Create export content
let content = `OCR Text Comparison Export\n`;
content += `==========================\n\n`;
content += `Dataset: ${metadata.dataset}\n`;
content += `Page: ${metadata.page} of ${metadata.totalPages}\n`;
content += `Export Date: ${new Date().toLocaleString()}\n`;
content += `Similarity: ${metadata.similarity}\n`;
content += `Characters: ${metadata.statistics.characters.total} total, `;
content += `${metadata.statistics.characters.added} added, `;
content += `${metadata.statistics.characters.removed} removed\n`;
content += `Words: ${metadata.statistics.words.original}${metadata.statistics.words.improved}\n`;
content += `\n${'='.repeat(50)}\n\n`;
content += `ORIGINAL OCR:\n`;
content += `${'='.repeat(50)}\n`;
content += original;
content += `\n\n${'='.repeat(50)}\n\n`;
content += `IMPROVED OCR:\n`;
content += `${'='.repeat(50)}\n`;
content += improved;
// Download file
const blob = new Blob([content], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `ocr-comparison-${this.datasetId.replace('/', '-')}-page-${this.currentIndex + 1}.txt`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
},
// Flow view methods
async toggleFlowView() {
this.showFlowView = !this.showFlowView;
if (this.showFlowView) {
// Reset to center around current page when opening
this.flowStartIndex = Math.max(0, this.currentIndex - Math.floor(this.flowVisibleCount / 2));
await this.loadFlowItems();
}
},
async loadFlowItems() {
// Load thumbnails from flowStartIndex
const startIdx = this.flowStartIndex;
this.flowItems = [];
// Load visible items
for (let i = 0; i < this.flowVisibleCount && (startIdx + i) < this.totalSamples; i++) {
const idx = startIdx + i;
try {
const data = await this.api.getRow(
this.datasetId,
this.datasetConfig,
this.datasetSplit,
idx
);
const columns = this.api.detectColumns(null, data.row);
const imageData = columns.image ? data.row[columns.image] : null;
this.flowItems.push({
index: idx,
imageSrc: imageData?.src || '',
row: data.row
});
} catch (error) {
console.error(`Failed to load flow item ${idx}:`, error);
}
}
},
scrollFlowLeft() {
if (this.flowStartIndex > 0) {
this.flowStartIndex = Math.max(0, this.flowStartIndex - this.flowVisibleCount);
this.loadFlowItems();
}
},
scrollFlowRight() {
if (this.flowStartIndex < this.totalSamples - this.flowVisibleCount) {
this.flowStartIndex = Math.min(
this.totalSamples - this.flowVisibleCount,
this.flowStartIndex + this.flowVisibleCount
);
this.loadFlowItems();
}
},
async jumpToFlowPage(index) {
this.showFlowView = false;
await this.loadSample(index);
},
async handleFlowImageError(event, index) {
// Try to refresh the image URL for flow item
try {
const data = await this.api.refreshImageUrl(
this.datasetId,
this.datasetConfig,
this.datasetSplit,
index
);
if (data.row) {
const columns = this.api.detectColumns(null, data.row);
const imageData = columns.image ? data.row[columns.image] : null;
if (imageData?.src) {
event.target.src = imageData.src;
// Update the flow item
const flowItem = this.flowItems.find(item => item.index === index);
if (flowItem) {
flowItem.imageSrc = imageData.src;
}
}
}
} catch (error) {
console.error('Failed to refresh flow image URL:', error);
}
},
// Dock methods
async showDockPreview() {
// Clear any hide timeout
if (this.dockHideTimeout) {
clearTimeout(this.dockHideTimeout);
this.dockHideTimeout = null;
}
this.showDock = true;
// Center dock around current page
this.dockStartIndex = Math.max(0,
Math.min(
this.currentIndex - Math.floor(this.dockVisibleCount / 2),
this.totalSamples - this.dockVisibleCount
)
);
// Always reload dock items to show current position
await this.loadDockItems();
},
hideDockPreview() {
// Add a small delay to prevent flickering
this.dockHideTimeout = setTimeout(() => {
this.showDock = false;
}, 300);
},
async loadDockItems() {
// Load thumbnails based on dock start index
const endIdx = Math.min(this.totalSamples, this.dockStartIndex + this.dockVisibleCount);
this.dockItems = [];
for (let i = this.dockStartIndex; i < endIdx; i++) {
try {
const data = await this.api.getRow(
this.datasetId,
this.datasetConfig,
this.datasetSplit,
i
);
const columns = this.api.detectColumns(null, data.row);
const imageData = columns.image ? data.row[columns.image] : null;
this.dockItems.push({
index: i,
imageSrc: imageData?.src || '',
row: data.row
});
} catch (error) {
console.error(`Failed to load dock item ${i}:`, error);
}
}
},
async scrollDockLeft() {
if (this.dockStartIndex > 0) {
this.dockStartIndex = Math.max(0, this.dockStartIndex - Math.floor(this.dockVisibleCount / 2));
await this.loadDockItems();
}
},
async scrollDockRight() {
if (this.dockStartIndex < this.totalSamples - this.dockVisibleCount) {
this.dockStartIndex = Math.min(
this.totalSamples - this.dockVisibleCount,
this.dockStartIndex + Math.floor(this.dockVisibleCount / 2)
);
await this.loadDockItems();
}
},
async jumpToDockPage(index) {
this.showDock = false;
await this.loadSample(index);
},
// Watch for diff mode changes
initWatchers() {
this.$watch('diffMode', () => this.updateDiff());
this.$watch('currentSample', () => this.updateDiff());
}
}));
});
// Initialize watchers after Alpine loads
document.addEventListener('alpine:initialized', () => {
Alpine.store('ocrExplorer')?.initWatchers?.();
});