Tonic commited on
Commit
3497964
Β·
unverified Β·
1 Parent(s): 63a03ad

return formatted res

Browse files
Files changed (1) hide show
  1. app.py +31 -3
app.py CHANGED
@@ -132,6 +132,36 @@ def update_inputs(task):
132
  gr.update(visible=True)
133
  ]
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
136
  res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
137
 
@@ -139,9 +169,7 @@ def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
139
  return res, None
140
 
141
  res = res.replace("\\title", "\\title ")
142
- lines = re.split(r'\\\\', res) # Split on double backslashes
143
- formatted_lines = [f"$$ {line.strip()} $$" for line in lines if line.strip()]
144
- formatted_res = "\n".join(formatted_lines)
145
 
146
  if html_content:
147
  encoded_html = base64.b64encode(html_content.encode('utf-8')).decode('utf-8')
 
132
  gr.update(visible=True)
133
  ]
134
 
135
+ def parse_latex_output(res):
136
+ lines = res.split('\n')
137
+ parsed_lines = []
138
+ in_tabular = False
139
+
140
+ for line in lines:
141
+ line = line.strip()
142
+ if not line:
143
+ continue
144
+
145
+ if line.startswith('\\begin{tabular}') or line.startswith('\\end{tabular}'):
146
+ parsed_lines.append(f'$$ {line} $$')
147
+ in_tabular = line.startswith('\\begin{tabular}')
148
+ continue
149
+
150
+ if in_tabular:
151
+ parsed_lines.append(f'$$ {line} $$')
152
+ continue
153
+
154
+ if line.startswith('\\title') or line.startswith('\\author') or line.startswith('\\section'):
155
+ parsed_lines.append(line)
156
+ continue
157
+
158
+ if re.search(r'[\\{}$_^]', line) or any(keyword in line for keyword in ['\\hline', '\\begin', '\\end']):
159
+ parsed_lines.append(f'$$ {line} $$')
160
+ else:
161
+ parsed_lines.append(line)
162
+
163
+ return '\n'.join(parsed_lines)
164
+
165
  def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
166
  res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
167
 
 
169
  return res, None
170
 
171
  res = res.replace("\\title", "\\title ")
172
+ formatted_res = parse_latex_output(res)
 
 
173
 
174
  if html_content:
175
  encoded_html = base64.b64encode(html_content.encode('utf-8')).decode('utf-8')