Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,18 +4,14 @@ import os
|
|
| 4 |
import TDTSR
|
| 5 |
import pytesseract
|
| 6 |
from pytesseract import Output
|
| 7 |
-
import postprocess as pp
|
| 8 |
import pandas as pd
|
| 9 |
import matplotlib.pyplot as plt
|
| 10 |
import cv2
|
| 11 |
import numpy as np
|
| 12 |
-
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
| 13 |
from cv2 import dnn_superres
|
| 14 |
|
| 15 |
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
st.set_option('deprecation.showPyplotGlobalUse', False)
|
| 20 |
st.set_page_config(layout='wide')
|
| 21 |
st.title("Table Detection and Table Structure Recognition")
|
|
@@ -32,19 +28,6 @@ def cv_to_PIL(cv_img):
|
|
| 32 |
def pytess(cell_pil_img):
|
| 33 |
return ' '.join(pytesseract.image_to_data(cell_pil_img, output_type=Output.DICT, config='preserve_interword_spaces')['text']).strip()
|
| 34 |
|
| 35 |
-
def TrOCR(cell_pil_img):
|
| 36 |
-
|
| 37 |
-
processor = TrOCRProcessor.from_pretrained("SalML/trocr-base-printed")
|
| 38 |
-
model = VisionEncoderDecoderModel.from_pretrained("SalML/trocr-base-printed")
|
| 39 |
-
pixel_values = processor(images=cell_pil_img, return_tensors="pt").pixel_values
|
| 40 |
-
|
| 41 |
-
generated_ids = model.generate(pixel_values)
|
| 42 |
-
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 43 |
-
|
| 44 |
-
return generated_text
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
def super_res(pil_img):
|
| 49 |
# requires opencv-contrib-python installed without the opencv-python
|
| 50 |
sr = dnn_superres.DnnSuperResImpl_create()
|
|
|
|
| 4 |
import TDTSR
|
| 5 |
import pytesseract
|
| 6 |
from pytesseract import Output
|
|
|
|
| 7 |
import pandas as pd
|
| 8 |
import matplotlib.pyplot as plt
|
| 9 |
import cv2
|
| 10 |
import numpy as np
|
|
|
|
| 11 |
from cv2 import dnn_superres
|
| 12 |
|
| 13 |
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
|
| 14 |
|
|
|
|
|
|
|
| 15 |
st.set_option('deprecation.showPyplotGlobalUse', False)
|
| 16 |
st.set_page_config(layout='wide')
|
| 17 |
st.title("Table Detection and Table Structure Recognition")
|
|
|
|
| 28 |
def pytess(cell_pil_img):
|
| 29 |
return ' '.join(pytesseract.image_to_data(cell_pil_img, output_type=Output.DICT, config='preserve_interword_spaces')['text']).strip()
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def super_res(pil_img):
|
| 32 |
# requires opencv-contrib-python installed without the opencv-python
|
| 33 |
sr = dnn_superres.DnnSuperResImpl_create()
|