Spaces:
Sleeping
Sleeping
Update templates/index.html
Browse files- templates/index.html +107 -22
templates/index.html
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
<head>
|
4 |
<meta charset="utf-8">
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
6 |
-
<title>PDF to Markdown Converter (
|
7 |
<style>
|
8 |
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; margin: 0; background-color: #f0f2f5; color: #1c1e21; line-height: 1.5; }
|
9 |
.navbar { background-color: #1877f2; padding: 10px 20px; color: white; text-align: center; }
|
@@ -13,12 +13,14 @@
|
|
13 |
label { display: block; margin-top: 15px; margin-bottom: 5px; font-weight: 600; color: #4b4f56; }
|
14 |
input[type="file"], input[type="text"] { width: calc(100% - 22px); padding: 10px; margin-top: 5px; border: 1px solid #dddfe2; border-radius: 6px; font-size: 1em; }
|
15 |
input[type="file"] { padding: 7px; }
|
16 |
-
|
17 |
-
|
|
|
18 |
.message { margin-top: 20px; padding: 12px; border-radius: 6px; font-size: 0.95em; }
|
19 |
-
.status { background-color: #e7f3ff; border: 1px solid #cfe2ff; color: #055160; }
|
20 |
.error { background-color: #f8d7da; border: 1px solid #f5c2c7; color: #842029; }
|
21 |
-
|
|
|
|
|
22 |
.or-separator { text-align: center; margin: 20px 0; font-weight: bold; color: #606770; }
|
23 |
.form-actions { text-align: center; }
|
24 |
.footer { text-align: center; margin-top: 30px; font-size: 0.85em; color: #606770; }
|
@@ -26,19 +28,14 @@
|
|
26 |
</head>
|
27 |
<body>
|
28 |
<div class="navbar">
|
29 |
-
<h1>PDF to Markdown Converter</h1>
|
30 |
</div>
|
31 |
<div class="container">
|
32 |
-
<p>Upload a PDF file or provide a URL to convert it to Markdown.
|
33 |
|
34 |
-
|
35 |
-
<div class="message error">{{ error_message }}</div>
|
36 |
-
{% endif %}
|
37 |
-
{% if status_message and not markdown_output %}
|
38 |
-
<div class="message status">{{ status_message }}</div>
|
39 |
-
{% endif %}
|
40 |
|
41 |
-
<form
|
42 |
<div>
|
43 |
<label for="pdf_file">Upload PDF File:</label>
|
44 |
<input type="file" name="pdf_file" id="pdf_file" accept=".pdf">
|
@@ -49,20 +46,108 @@
|
|
49 |
<input type="text" name="pdf_url" id="pdf_url" placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf">
|
50 |
</div>
|
51 |
<div class="form-actions">
|
52 |
-
<
|
53 |
</div>
|
54 |
</form>
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
</div>
|
64 |
<div class="footer">
|
65 |
<p>Powered by Flask, Poppler, Tesseract, and Hugging Face.</p>
|
66 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
</body>
|
68 |
</html>
|
|
|
3 |
<head>
|
4 |
<meta charset="utf-8">
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
6 |
+
<title>PDF to Markdown Converter (Streaming)</title>
|
7 |
<style>
|
8 |
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; margin: 0; background-color: #f0f2f5; color: #1c1e21; line-height: 1.5; }
|
9 |
.navbar { background-color: #1877f2; padding: 10px 20px; color: white; text-align: center; }
|
|
|
13 |
label { display: block; margin-top: 15px; margin-bottom: 5px; font-weight: 600; color: #4b4f56; }
|
14 |
input[type="file"], input[type="text"] { width: calc(100% - 22px); padding: 10px; margin-top: 5px; border: 1px solid #dddfe2; border-radius: 6px; font-size: 1em; }
|
15 |
input[type="file"] { padding: 7px; }
|
16 |
+
#submitBtn { background-color: #1877f2; color: white; padding: 10px 20px; border: none; border-radius: 6px; cursor: pointer; margin-top: 25px; font-size: 1.1em; font-weight: bold; }
|
17 |
+
#submitBtn:hover { background-color: #166fe5; }
|
18 |
+
#submitBtn:disabled { background-color: #a0a0a0; cursor: not-allowed; }
|
19 |
.message { margin-top: 20px; padding: 12px; border-radius: 6px; font-size: 0.95em; }
|
|
|
20 |
.error { background-color: #f8d7da; border: 1px solid #f5c2c7; color: #842029; }
|
21 |
+
#statusArea { background-color: #e7f3ff; border: 1px solid #cfe2ff; color: #055160; margin-top: 20px; padding: 10px; min-height: 50px; border-radius: 6px; }
|
22 |
+
#statusArea p { margin: 5px 0; }
|
23 |
+
#markdownOutput { background-color: #f5f6f7; padding: 15px; border: 1px solid #e0e0e0; border-radius: 6px; white-space: pre-wrap; word-wrap: break-word; margin-top: 20px; font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; font-size: 0.9em; line-height: 1.6; min-height: 100px; }
|
24 |
.or-separator { text-align: center; margin: 20px 0; font-weight: bold; color: #606770; }
|
25 |
.form-actions { text-align: center; }
|
26 |
.footer { text-align: center; margin-top: 30px; font-size: 0.85em; color: #606770; }
|
|
|
28 |
</head>
|
29 |
<body>
|
30 |
<div class="navbar">
|
31 |
+
<h1>PDF to Markdown Converter (Streaming)</h1>
|
32 |
</div>
|
33 |
<div class="container">
|
34 |
+
<p>Upload a PDF file or provide a URL to convert it to Markdown. Progress will be streamed.</p>
|
35 |
|
36 |
+
<div id="globalError" class="message error" style="display:none;"></div>
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
<form id="pdfForm">
|
39 |
<div>
|
40 |
<label for="pdf_file">Upload PDF File:</label>
|
41 |
<input type="file" name="pdf_file" id="pdf_file" accept=".pdf">
|
|
|
46 |
<input type="text" name="pdf_url" id="pdf_url" placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf">
|
47 |
</div>
|
48 |
<div class="form-actions">
|
49 |
+
<button type="button" id="submitBtn">Convert to Markdown</button>
|
50 |
</div>
|
51 |
</form>
|
52 |
|
53 |
+
<h2>Processing Status:</h2>
|
54 |
+
<div id="statusArea">
|
55 |
+
<p>Waiting for input...</p>
|
56 |
+
</div>
|
57 |
+
|
58 |
+
<h2>Markdown Output:</h2>
|
59 |
+
<pre id="markdownOutput">Output will appear here...</pre>
|
60 |
</div>
|
61 |
<div class="footer">
|
62 |
<p>Powered by Flask, Poppler, Tesseract, and Hugging Face.</p>
|
63 |
</div>
|
64 |
+
|
65 |
+
<script>
|
66 |
+
const form = document.getElementById('pdfForm');
|
67 |
+
const submitBtn = document.getElementById('submitBtn');
|
68 |
+
const statusArea = document.getElementById('statusArea');
|
69 |
+
const markdownOutput = document.getElementById('markdownOutput');
|
70 |
+
const globalError = document.getElementById('globalError');
|
71 |
+
|
72 |
+
submitBtn.addEventListener('click', async function(event) {
|
73 |
+
event.preventDefault();
|
74 |
+
submitBtn.disabled = true;
|
75 |
+
statusArea.innerHTML = '<p>Starting processing...</p>';
|
76 |
+
markdownOutput.textContent = 'Processing...';
|
77 |
+
globalError.style.display = 'none';
|
78 |
+
|
79 |
+
const formData = new FormData(form);
|
80 |
+
|
81 |
+
try {
|
82 |
+
const response = await fetch("{{ url_for('process_pdf_stream') }}", {
|
83 |
+
method: 'POST',
|
84 |
+
body: formData,
|
85 |
+
});
|
86 |
+
|
87 |
+
if (!response.ok) {
|
88 |
+
// Handle initial HTTP errors before streaming starts (e.g., 400, 500 from Flask before yield)
|
89 |
+
const errorText = await response.text();
|
90 |
+
throw new Error(`Server error: ${response.status} ${response.statusText}. ${errorText}`);
|
91 |
+
}
|
92 |
+
|
93 |
+
// Process the streamed response
|
94 |
+
const reader = response.body.getReader();
|
95 |
+
const decoder = new TextDecoder();
|
96 |
+
markdownOutput.textContent = ''; // Clear previous output
|
97 |
+
|
98 |
+
while (true) {
|
99 |
+
const { value, done } = await reader.read();
|
100 |
+
if (done) {
|
101 |
+
statusArea.innerHTML += '<p><strong>Processing complete.</strong></p>';
|
102 |
+
break;
|
103 |
+
}
|
104 |
+
|
105 |
+
const chunk = decoder.decode(value, { stream: true });
|
106 |
+
// Expecting JSON objects: {"type": "status", "message": "..."} or {"type": "markdown", "content": "..."} or {"type": "error", "message": "..."}
|
107 |
+
// Simple split for potentially multiple JSON objects in one chunk
|
108 |
+
chunk.split('\n').forEach(line => {
|
109 |
+
if (line.trim() === '') return;
|
110 |
+
try {
|
111 |
+
const data = JSON.parse(line);
|
112 |
+
if (data.type === 'status') {
|
113 |
+
const p = document.createElement('p');
|
114 |
+
p.textContent = data.message;
|
115 |
+
statusArea.appendChild(p);
|
116 |
+
statusArea.scrollTop = statusArea.scrollHeight; // Auto-scroll
|
117 |
+
} else if (data.type === 'markdown_chunk') {
|
118 |
+
markdownOutput.textContent += data.content;
|
119 |
+
} else if (data.type === 'markdown_replace') {
|
120 |
+
markdownOutput.textContent = data.content; // For initial title or full rewrite
|
121 |
+
} else if (data.type === 'image_md') {
|
122 |
+
markdownOutput.textContent += data.content;
|
123 |
+
} else if (data.type === 'error') {
|
124 |
+
const p = document.createElement('p');
|
125 |
+
p.style.color = 'red';
|
126 |
+
p.textContent = 'ERROR: ' + data.message;
|
127 |
+
statusArea.appendChild(p);
|
128 |
+
globalError.textContent = 'An error occurred: ' + data.message;
|
129 |
+
globalError.style.display = 'block';
|
130 |
+
} else if (data.type === 'final_status') {
|
131 |
+
statusArea.innerHTML += `<p><strong>${data.message}</strong></p>`;
|
132 |
+
}
|
133 |
+
} catch (e) {
|
134 |
+
console.warn('Failed to parse JSON chunk:', line, e);
|
135 |
+
// Might be raw text for debugging or incomplete JSON
|
136 |
+
// statusArea.innerHTML += `<p>Raw chunk: ${line}</p>`;
|
137 |
+
}
|
138 |
+
});
|
139 |
+
}
|
140 |
+
|
141 |
+
} catch (error) {
|
142 |
+
console.error('Fetch error:', error);
|
143 |
+
statusArea.innerHTML = `<p style="color:red;"><strong>Processing failed:</strong> ${error.message}</p>`;
|
144 |
+
markdownOutput.textContent = 'Error occurred.';
|
145 |
+
globalError.textContent = `An error occurred during the request: ${error.message}`;
|
146 |
+
globalError.style.display = 'block';
|
147 |
+
} finally {
|
148 |
+
submitBtn.disabled = false;
|
149 |
+
}
|
150 |
+
});
|
151 |
+
</script>
|
152 |
</body>
|
153 |
</html>
|