Tonic commited on
Commit
852d108
·
1 Parent(s): aaa7ccd

Delete ocr_app.py

Browse files
Files changed (1) hide show
  1. ocr_app.py +0 -167
ocr_app.py DELETED
@@ -1,167 +0,0 @@
1
- import io
2
-
3
- import pandas as pd
4
- import streamlit as st
5
- from streamlit_drawable_canvas import st_canvas
6
- import hashlib
7
- import pypdfium2
8
-
9
- from texify.inference import batch_inference
10
- from texify.model.model import load_model
11
- from texify.model.processor import load_processor
12
- from texify.settings import settings
13
- import subprocess
14
- import re
15
- from PIL import Image
16
-
17
- MAX_WIDTH = 1000
18
-
19
-
20
- def replace_katex_invalid(string):
21
- # KaTeX cannot render all LaTeX, so we need to replace some things
22
- string = re.sub(r'\\tag\{.*?\}', '', string)
23
- string = re.sub(r'\\Big\{(.*?)\}|\\big\{(.*?)\}', r'\1\2', string)
24
- return string
25
-
26
- @st.cache_resource()
27
- def load_model_cached():
28
- return load_model()
29
-
30
-
31
- @st.cache_resource()
32
- def load_processor_cached():
33
- return load_processor()
34
-
35
-
36
- @st.cache_data()
37
- def infer_image(pil_image, bbox, temperature):
38
- input_img = pil_image.crop(bbox)
39
- model_output = batch_inference([input_img], model, processor, temperature=temperature)
40
- return model_output[0]
41
-
42
-
43
- def open_pdf(pdf_file):
44
- stream = io.BytesIO(pdf_file.getvalue())
45
- return pypdfium2.PdfDocument(stream)
46
-
47
-
48
- @st.cache_data()
49
- def get_page_image(pdf_file, page_num, dpi=96):
50
- doc = open_pdf(pdf_file)
51
- renderer = doc.render(
52
- pypdfium2.PdfBitmap.to_pil,
53
- page_indices=[page_num - 1],
54
- scale=dpi / 72,
55
- )
56
- png = list(renderer)[0]
57
- png_image = png.convert("RGB")
58
- return png_image
59
-
60
-
61
- @st.cache_data()
62
- def get_uploaded_image(in_file):
63
- return Image.open(in_file).convert("RGB")
64
-
65
-
66
- @st.cache_data()
67
- def page_count(pdf_file):
68
- doc = open_pdf(pdf_file)
69
- return len(doc)
70
-
71
-
72
- def get_canvas_hash(pil_image):
73
- return hashlib.md5(pil_image.tobytes()).hexdigest()
74
-
75
-
76
- @st.cache_data()
77
- def get_image_size(pil_image):
78
- if pil_image is None:
79
- return 800, 600
80
- height, width = pil_image.height, pil_image.width
81
- if width > MAX_WIDTH:
82
- scale = MAX_WIDTH / width
83
- height = int(height * scale)
84
- width = MAX_WIDTH
85
- return height, width
86
-
87
-
88
- st.set_page_config(layout="wide")
89
-
90
- top_message = """### Texify
91
-
92
- After the model loads, upload an image or a pdf, then draw a box around the equation or text you want to OCR by clicking and dragging. Texify will convert it to Markdown with LaTeX math on the right.
93
-
94
- If you have already cropped your image, select "OCR image" in the sidebar instead.
95
- """
96
-
97
- st.markdown(top_message)
98
- col1, col2 = st.columns([.7, .3])
99
-
100
- model = load_model_cached()
101
- processor = load_processor_cached()
102
-
103
- in_file = st.sidebar.file_uploader("PDF file or image:", type=["pdf", "png", "jpg", "jpeg", "gif", "webp"])
104
- if in_file is None:
105
- st.stop()
106
-
107
- filetype = in_file.type
108
- whole_image = False
109
- if "pdf" in filetype:
110
- page_count = page_count(in_file)
111
- page_number = st.sidebar.number_input(f"Page number out of {page_count}:", min_value=1, value=1, max_value=page_count)
112
-
113
- pil_image = get_page_image(in_file, page_number)
114
- else:
115
- pil_image = get_uploaded_image(in_file)
116
- whole_image = st.sidebar.button("OCR image")
117
-
118
- temperature = st.sidebar.slider("Generation temperature:", min_value=0.0, max_value=1.0, value=0.0, step=0.05)
119
-
120
- canvas_hash = get_canvas_hash(pil_image) if pil_image else "canvas"
121
-
122
- with col1:
123
- # Create a canvas component
124
- canvas_result = st_canvas(
125
- fill_color="rgba(255, 165, 0, 0.1)", # Fixed fill color with some opacity
126
- stroke_width=1,
127
- stroke_color="#FFAA00",
128
- background_color="#FFF",
129
- background_image=pil_image,
130
- update_streamlit=True,
131
- height=get_image_size(pil_image)[0],
132
- width=get_image_size(pil_image)[1],
133
- drawing_mode="rect",
134
- point_display_radius=0,
135
- key=canvas_hash,
136
- )
137
-
138
- if canvas_result.json_data is not None or whole_image:
139
- objects = pd.json_normalize(canvas_result.json_data["objects"]) # need to convert obj to str because PyArrow
140
- bbox_list = None
141
- if objects.shape[0] > 0:
142
- boxes = objects[objects["type"] == "rect"][["left", "top", "width", "height"]]
143
- boxes["right"] = boxes["left"] + boxes["width"]
144
- boxes["bottom"] = boxes["top"] + boxes["height"]
145
- bbox_list = boxes[["left", "top", "right", "bottom"]].values.tolist()
146
- if whole_image:
147
- bbox_list = [(0, 0, pil_image.width, pil_image.height)]
148
-
149
- if bbox_list:
150
- with col2:
151
- inferences = [infer_image(pil_image, bbox, temperature) for bbox in bbox_list]
152
- for idx, inference in enumerate(reversed(inferences)):
153
- st.markdown(f"### {len(inferences) - idx}")
154
- katex_markdown = replace_katex_invalid(inference)
155
- st.markdown(katex_markdown)
156
- st.code(inference)
157
- st.divider()
158
-
159
- with col2:
160
- tips = """
161
- ### Usage tips
162
- - Don't make your boxes too small or too large. See the examples and the video in the [README](https://github.com/vikParuchuri/texify) for more info.
163
- - Texify is sensitive to how you draw the box around the text you want to OCR. If you get bad results, try selecting a slightly different box, or splitting the box into multiple.
164
- - You can try changing the temperature value on the left if you don't get good results. This controls how "creative" the model is.
165
- - Sometimes KaTeX won't be able to render an equation (red error text), but it will still be valid LaTeX. You can copy the LaTeX and render it elsewhere.
166
- """
167
- st.markdown(tips)