onursavas's picture
Update app.py
729bdca
raw
history blame
920 Bytes
import gradio as gr
import os
import cv2
from paddleocr import PPStructure,save_structure_res
from paddleocr.ppstructure.recovery.recovery_to_doc import sorted_layout_boxes, convert_info_docx
# Chinese image
table_engine = PPStructure(recovery=True)
# English image
# table_engine = PPStructure(recovery=True, lang='en')
def find_layout(image):
save_folder = './output'
img = cv2.imread(image)
result = table_engine(img)
save_structure_res(result, save_folder, os.path.basename("result").split('.')[0])
final_text = ""
for line in result:
line.pop('img')
print(line)
final_text += "\n" + line
h, w, _ = img.shape
res = sorted_layout_boxes(result, w)
convert_info_docx(img, res, save_folder, os.path.basename("result").split('.')[0])
return final_text
iface = gr.Interface(fn=find_layout, inputs=[gr.Image(type="pil")], outputs="text")
iface.launch()