Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -4,18 +4,14 @@ import os
|
|
4 |
import TDTSR
|
5 |
import pytesseract
|
6 |
from pytesseract import Output
|
7 |
-
import postprocess as pp
|
8 |
import pandas as pd
|
9 |
import matplotlib.pyplot as plt
|
10 |
import cv2
|
11 |
import numpy as np
|
12 |
-
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
13 |
from cv2 import dnn_superres
|
14 |
|
15 |
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
|
16 |
|
17 |
-
|
18 |
-
|
19 |
st.set_option('deprecation.showPyplotGlobalUse', False)
|
20 |
st.set_page_config(layout='wide')
|
21 |
st.title("Table Detection and Table Structure Recognition")
|
@@ -32,19 +28,6 @@ def cv_to_PIL(cv_img):
|
|
32 |
def pytess(cell_pil_img):
|
33 |
return ' '.join(pytesseract.image_to_data(cell_pil_img, output_type=Output.DICT, config='preserve_interword_spaces')['text']).strip()
|
34 |
|
35 |
-
def TrOCR(cell_pil_img):
|
36 |
-
|
37 |
-
processor = TrOCRProcessor.from_pretrained("SalML/trocr-base-printed")
|
38 |
-
model = VisionEncoderDecoderModel.from_pretrained("SalML/trocr-base-printed")
|
39 |
-
pixel_values = processor(images=cell_pil_img, return_tensors="pt").pixel_values
|
40 |
-
|
41 |
-
generated_ids = model.generate(pixel_values)
|
42 |
-
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
43 |
-
|
44 |
-
return generated_text
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
def super_res(pil_img):
|
49 |
# requires opencv-contrib-python installed without the opencv-python
|
50 |
sr = dnn_superres.DnnSuperResImpl_create()
|
|
|
4 |
import TDTSR
|
5 |
import pytesseract
|
6 |
from pytesseract import Output
|
|
|
7 |
import pandas as pd
|
8 |
import matplotlib.pyplot as plt
|
9 |
import cv2
|
10 |
import numpy as np
|
|
|
11 |
from cv2 import dnn_superres
|
12 |
|
13 |
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
|
14 |
|
|
|
|
|
15 |
st.set_option('deprecation.showPyplotGlobalUse', False)
|
16 |
st.set_page_config(layout='wide')
|
17 |
st.title("Table Detection and Table Structure Recognition")
|
|
|
28 |
def pytess(cell_pil_img):
|
29 |
return ' '.join(pytesseract.image_to_data(cell_pil_img, output_type=Output.DICT, config='preserve_interword_spaces')['text']).strip()
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
def super_res(pil_img):
|
32 |
# requires opencv-contrib-python installed without the opencv-python
|
33 |
sr = dnn_superres.DnnSuperResImpl_create()
|