pdf2markdown / templates /index.html
broadfield-dev's picture
Update templates/index.html
cf5a0c5 verified
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<title>PDF to Markdown Converter (Streaming)</title>
<style>
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; margin: 0; background-color: #f0f2f5; color: #1c1e21; line-height: 1.5; }
.navbar { background-color: #1877f2; padding: 10px 20px; color: white; text-align: center; }
.navbar h1 { margin: 0; font-size: 1.8em; }
.container { max-width: 800px; margin: 20px auto; background-color: #fff; padding: 25px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1), 0 8px 16px rgba(0,0,0,0.1); }
p { margin-bottom: 1em; }
label { display: block; margin-top: 15px; margin-bottom: 5px; font-weight: 600; color: #4b4f56; }
input[type="file"], input[type="text"] { width: calc(100% - 22px); padding: 10px; margin-top: 5px; border: 1px solid #dddfe2; border-radius: 6px; font-size: 1em; }
input[type="file"] { padding: 7px; }
#submitBtn { background-color: #1877f2; color: white; padding: 10px 20px; border: none; border-radius: 6px; cursor: pointer; margin-top: 25px; font-size: 1.1em; font-weight: bold; }
#submitBtn:hover { background-color: #166fe5; }
#submitBtn:disabled { background-color: #a0a0a0; cursor: not-allowed; }
.message { margin-top: 20px; padding: 12px; border-radius: 6px; font-size: 0.95em; }
.error { background-color: #f8d7da; border: 1px solid #f5c2c7; color: #842029; }
#statusArea { background-color: #e7f3ff; border: 1px solid #cfe2ff; color: #055160; margin-top: 20px; padding: 10px; min-height: 50px; border-radius: 6px; }
#statusArea p { margin: 5px 0; }
#markdownOutput { background-color: #f5f6f7; padding: 15px; border: 1px solid #e0e0e0; border-radius: 6px; white-space: pre-wrap; word-wrap: break-word; margin-top: 20px; font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; font-size: 0.9em; line-height: 1.6; min-height: 100px; }
.or-separator { text-align: center; margin: 20px 0; font-weight: bold; color: #606770; }
.form-actions { text-align: center; }
.footer { text-align: center; margin-top: 30px; font-size: 0.85em; color: #606770; }
</style>
</head>
<body>
<div class="navbar">
<h1>PDF to Markdown Converter (Streaming)</h1>
</div>
<div class="container">
<p>Upload a PDF file or provide a URL to convert it to Markdown. Progress will be streamed.</p>
<div id="globalError" class="message error" style="display:none;"></div>
<form id="pdfForm">
<div>
<label for="pdf_file">Upload PDF File:</label>
<input type="file" name="pdf_file" id="pdf_file" accept=".pdf">
</div>
<div class="or-separator">OR</div>
<div>
<label for="pdf_url">Enter PDF URL:</label>
<input type="text" name="pdf_url" id="pdf_url" placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf">
</div>
<div class="form-actions">
<button type="button" id="submitBtn">Convert to Markdown</button>
</div>
</form>
<h2>Processing Status:</h2>
<div id="statusArea">
<p>Waiting for input...</p>
</div>
<h2>Markdown Output:</h2>
<pre id="markdownOutput">Output will appear here...</pre>
</div>
<div class="footer">
<p>Powered by Flask, Poppler, Tesseract, and Hugging Face.</p>
</div>
<script>
const form = document.getElementById('pdfForm');
const submitBtn = document.getElementById('submitBtn');
const statusArea = document.getElementById('statusArea');
const markdownOutput = document.getElementById('markdownOutput');
const globalError = document.getElementById('globalError');
submitBtn.addEventListener('click', async function(event) {
event.preventDefault();
submitBtn.disabled = true;
statusArea.innerHTML = '<p>Starting processing...</p>';
markdownOutput.textContent = 'Processing...';
globalError.style.display = 'none';
const formData = new FormData(form);
try {
const response = await fetch("{{ url_for('process_pdf_stream') }}", {
method: 'POST',
body: formData,
});
if (!response.ok) {
// Handle initial HTTP errors before streaming starts (e.g., 400, 500 from Flask before yield)
const errorText = await response.text();
throw new Error(`Server error: ${response.status} ${response.statusText}. ${errorText}`);
}
// Process the streamed response
const reader = response.body.getReader();
const decoder = new TextDecoder();
markdownOutput.textContent = ''; // Clear previous output
while (true) {
const { value, done } = await reader.read();
if (done) {
statusArea.innerHTML += '<p><strong>Processing complete.</strong></p>';
break;
}
const chunk = decoder.decode(value, { stream: true });
// Expecting JSON objects: {"type": "status", "message": "..."} or {"type": "markdown", "content": "..."} or {"type": "error", "message": "..."}
// Simple split for potentially multiple JSON objects in one chunk
chunk.split('\n').forEach(line => {
if (line.trim() === '') return;
try {
const data = JSON.parse(line);
if (data.type === 'status') {
const p = document.createElement('p');
p.textContent = data.message;
statusArea.appendChild(p);
statusArea.scrollTop = statusArea.scrollHeight; // Auto-scroll
} else if (data.type === 'markdown_chunk') {
markdownOutput.textContent += data.content;
} else if (data.type === 'markdown_replace') {
markdownOutput.textContent = data.content; // For initial title or full rewrite
} else if (data.type === 'image_md') {
markdownOutput.textContent += data.content;
} else if (data.type === 'error') {
const p = document.createElement('p');
p.style.color = 'red';
p.textContent = 'ERROR: ' + data.message;
statusArea.appendChild(p);
globalError.textContent = 'An error occurred: ' + data.message;
globalError.style.display = 'block';
} else if (data.type === 'final_status') {
statusArea.innerHTML += `<p><strong>${data.message}</strong></p>`;
}
} catch (e) {
console.warn('Failed to parse JSON chunk:', line, e);
// Might be raw text for debugging or incomplete JSON
// statusArea.innerHTML += `<p>Raw chunk: ${line}</p>`;
}
});
}
} catch (error) {
console.error('Fetch error:', error);
statusArea.innerHTML = `<p style="color:red;"><strong>Processing failed:</strong> ${error.message}</p>`;
markdownOutput.textContent = 'Error occurred.';
globalError.textContent = `An error occurred during the request: ${error.message}`;
globalError.style.display = 'block';
} finally {
submitBtn.disabled = false;
}
});
</script>
</body>
</html>