broadfield-dev commited on
Commit
cf5a0c5
·
verified ·
1 Parent(s): efff37f

Update templates/index.html

Browse files
Files changed (1) hide show
  1. templates/index.html +107 -22
templates/index.html CHANGED
@@ -3,7 +3,7 @@
3
  <head>
4
  <meta charset="utf-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
6
- <title>PDF to Markdown Converter (Flask)</title>
7
  <style>
8
  body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; margin: 0; background-color: #f0f2f5; color: #1c1e21; line-height: 1.5; }
9
  .navbar { background-color: #1877f2; padding: 10px 20px; color: white; text-align: center; }
@@ -13,12 +13,14 @@
13
  label { display: block; margin-top: 15px; margin-bottom: 5px; font-weight: 600; color: #4b4f56; }
14
  input[type="file"], input[type="text"] { width: calc(100% - 22px); padding: 10px; margin-top: 5px; border: 1px solid #dddfe2; border-radius: 6px; font-size: 1em; }
15
  input[type="file"] { padding: 7px; }
16
- input[type="submit"] { background-color: #1877f2; color: white; padding: 10px 20px; border: none; border-radius: 6px; cursor: pointer; margin-top: 25px; font-size: 1.1em; font-weight: bold; }
17
- input[type="submit"]:hover { background-color: #166fe5; }
 
18
  .message { margin-top: 20px; padding: 12px; border-radius: 6px; font-size: 0.95em; }
19
- .status { background-color: #e7f3ff; border: 1px solid #cfe2ff; color: #055160; }
20
  .error { background-color: #f8d7da; border: 1px solid #f5c2c7; color: #842029; }
21
- pre { background-color: #f5f6f7; padding: 15px; border: 1px solid #e0e0e0; border-radius: 6px; white-space: pre-wrap; word-wrap: break-word; margin-top: 20px; font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; font-size: 0.9em; line-height: 1.6; }
 
 
22
  .or-separator { text-align: center; margin: 20px 0; font-weight: bold; color: #606770; }
23
  .form-actions { text-align: center; }
24
  .footer { text-align: center; margin-top: 30px; font-size: 0.85em; color: #606770; }
@@ -26,19 +28,14 @@
26
  </head>
27
  <body>
28
  <div class="navbar">
29
- <h1>PDF to Markdown Converter</h1>
30
  </div>
31
  <div class="container">
32
- <p>Upload a PDF file or provide a URL to convert it to Markdown. Images will be extracted and uploaded to a Hugging Face dataset (requires <code>HF_TOKEN</code> in Space secrets).</p>
33
 
34
- {% if error_message %}
35
- <div class="message error">{{ error_message }}</div>
36
- {% endif %}
37
- {% if status_message and not markdown_output %}
38
- <div class="message status">{{ status_message }}</div>
39
- {% endif %}
40
 
41
- <form method="POST" enctype="multipart/form-data" action="{{ url_for('process_pdf_route') }}">
42
  <div>
43
  <label for="pdf_file">Upload PDF File:</label>
44
  <input type="file" name="pdf_file" id="pdf_file" accept=".pdf">
@@ -49,20 +46,108 @@
49
  <input type="text" name="pdf_url" id="pdf_url" placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf">
50
  </div>
51
  <div class="form-actions">
52
- <input type="submit" value="Convert to Markdown">
53
  </div>
54
  </form>
55
 
56
- {% if markdown_output %}
57
- <h2>Markdown Output:</h2>
58
- {% if status_message %}
59
- <div class="message status">{{ status_message }}</div>
60
- {% endif %}
61
- <pre>{{ markdown_output }}</pre>
62
- {% endif %}
63
  </div>
64
  <div class="footer">
65
  <p>Powered by Flask, Poppler, Tesseract, and Hugging Face.</p>
66
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  </body>
68
  </html>
 
3
  <head>
4
  <meta charset="utf-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
6
+ <title>PDF to Markdown Converter (Streaming)</title>
7
  <style>
8
  body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; margin: 0; background-color: #f0f2f5; color: #1c1e21; line-height: 1.5; }
9
  .navbar { background-color: #1877f2; padding: 10px 20px; color: white; text-align: center; }
 
13
  label { display: block; margin-top: 15px; margin-bottom: 5px; font-weight: 600; color: #4b4f56; }
14
  input[type="file"], input[type="text"] { width: calc(100% - 22px); padding: 10px; margin-top: 5px; border: 1px solid #dddfe2; border-radius: 6px; font-size: 1em; }
15
  input[type="file"] { padding: 7px; }
16
+ #submitBtn { background-color: #1877f2; color: white; padding: 10px 20px; border: none; border-radius: 6px; cursor: pointer; margin-top: 25px; font-size: 1.1em; font-weight: bold; }
17
+ #submitBtn:hover { background-color: #166fe5; }
18
+ #submitBtn:disabled { background-color: #a0a0a0; cursor: not-allowed; }
19
  .message { margin-top: 20px; padding: 12px; border-radius: 6px; font-size: 0.95em; }
 
20
  .error { background-color: #f8d7da; border: 1px solid #f5c2c7; color: #842029; }
21
+ #statusArea { background-color: #e7f3ff; border: 1px solid #cfe2ff; color: #055160; margin-top: 20px; padding: 10px; min-height: 50px; border-radius: 6px; }
22
+ #statusArea p { margin: 5px 0; }
23
+ #markdownOutput { background-color: #f5f6f7; padding: 15px; border: 1px solid #e0e0e0; border-radius: 6px; white-space: pre-wrap; word-wrap: break-word; margin-top: 20px; font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; font-size: 0.9em; line-height: 1.6; min-height: 100px; }
24
  .or-separator { text-align: center; margin: 20px 0; font-weight: bold; color: #606770; }
25
  .form-actions { text-align: center; }
26
  .footer { text-align: center; margin-top: 30px; font-size: 0.85em; color: #606770; }
 
28
  </head>
29
  <body>
30
  <div class="navbar">
31
+ <h1>PDF to Markdown Converter (Streaming)</h1>
32
  </div>
33
  <div class="container">
34
+ <p>Upload a PDF file or provide a URL to convert it to Markdown. Progress will be streamed.</p>
35
 
36
+ <div id="globalError" class="message error" style="display:none;"></div>
 
 
 
 
 
37
 
38
+ <form id="pdfForm">
39
  <div>
40
  <label for="pdf_file">Upload PDF File:</label>
41
  <input type="file" name="pdf_file" id="pdf_file" accept=".pdf">
 
46
  <input type="text" name="pdf_url" id="pdf_url" placeholder="e.g., https://arxiv.org/pdf/1706.03762.pdf">
47
  </div>
48
  <div class="form-actions">
49
+ <button type="button" id="submitBtn">Convert to Markdown</button>
50
  </div>
51
  </form>
52
 
53
+ <h2>Processing Status:</h2>
54
+ <div id="statusArea">
55
+ <p>Waiting for input...</p>
56
+ </div>
57
+
58
+ <h2>Markdown Output:</h2>
59
+ <pre id="markdownOutput">Output will appear here...</pre>
60
  </div>
61
  <div class="footer">
62
  <p>Powered by Flask, Poppler, Tesseract, and Hugging Face.</p>
63
  </div>
64
+
65
+ <script>
66
+ const form = document.getElementById('pdfForm');
67
+ const submitBtn = document.getElementById('submitBtn');
68
+ const statusArea = document.getElementById('statusArea');
69
+ const markdownOutput = document.getElementById('markdownOutput');
70
+ const globalError = document.getElementById('globalError');
71
+
72
+ submitBtn.addEventListener('click', async function(event) {
73
+ event.preventDefault();
74
+ submitBtn.disabled = true;
75
+ statusArea.innerHTML = '<p>Starting processing...</p>';
76
+ markdownOutput.textContent = 'Processing...';
77
+ globalError.style.display = 'none';
78
+
79
+ const formData = new FormData(form);
80
+
81
+ try {
82
+ const response = await fetch("{{ url_for('process_pdf_stream') }}", {
83
+ method: 'POST',
84
+ body: formData,
85
+ });
86
+
87
+ if (!response.ok) {
88
+ // Handle initial HTTP errors before streaming starts (e.g., 400, 500 from Flask before yield)
89
+ const errorText = await response.text();
90
+ throw new Error(`Server error: ${response.status} ${response.statusText}. ${errorText}`);
91
+ }
92
+
93
+ // Process the streamed response
94
+ const reader = response.body.getReader();
95
+ const decoder = new TextDecoder();
96
+ markdownOutput.textContent = ''; // Clear previous output
97
+
98
+ while (true) {
99
+ const { value, done } = await reader.read();
100
+ if (done) {
101
+ statusArea.innerHTML += '<p><strong>Processing complete.</strong></p>';
102
+ break;
103
+ }
104
+
105
+ const chunk = decoder.decode(value, { stream: true });
106
+ // Expecting JSON objects: {"type": "status", "message": "..."} or {"type": "markdown", "content": "..."} or {"type": "error", "message": "..."}
107
+ // Simple split for potentially multiple JSON objects in one chunk
108
+ chunk.split('\n').forEach(line => {
109
+ if (line.trim() === '') return;
110
+ try {
111
+ const data = JSON.parse(line);
112
+ if (data.type === 'status') {
113
+ const p = document.createElement('p');
114
+ p.textContent = data.message;
115
+ statusArea.appendChild(p);
116
+ statusArea.scrollTop = statusArea.scrollHeight; // Auto-scroll
117
+ } else if (data.type === 'markdown_chunk') {
118
+ markdownOutput.textContent += data.content;
119
+ } else if (data.type === 'markdown_replace') {
120
+ markdownOutput.textContent = data.content; // For initial title or full rewrite
121
+ } else if (data.type === 'image_md') {
122
+ markdownOutput.textContent += data.content;
123
+ } else if (data.type === 'error') {
124
+ const p = document.createElement('p');
125
+ p.style.color = 'red';
126
+ p.textContent = 'ERROR: ' + data.message;
127
+ statusArea.appendChild(p);
128
+ globalError.textContent = 'An error occurred: ' + data.message;
129
+ globalError.style.display = 'block';
130
+ } else if (data.type === 'final_status') {
131
+ statusArea.innerHTML += `<p><strong>${data.message}</strong></p>`;
132
+ }
133
+ } catch (e) {
134
+ console.warn('Failed to parse JSON chunk:', line, e);
135
+ // Might be raw text for debugging or incomplete JSON
136
+ // statusArea.innerHTML += `<p>Raw chunk: ${line}</p>`;
137
+ }
138
+ });
139
+ }
140
+
141
+ } catch (error) {
142
+ console.error('Fetch error:', error);
143
+ statusArea.innerHTML = `<p style="color:red;"><strong>Processing failed:</strong> ${error.message}</p>`;
144
+ markdownOutput.textContent = 'Error occurred.';
145
+ globalError.textContent = `An error occurred during the request: ${error.message}`;
146
+ globalError.style.display = 'block';
147
+ } finally {
148
+ submitBtn.disabled = false;
149
+ }
150
+ });
151
+ </script>
152
  </body>
153
  </html>