doclayout / app /app.py
Sergey Mikhno
args
b3ab4bb
from flask import Flask
from doclayout_yolo import YOLOv10
from huggingface_hub import hf_hub_download
import torch
import spaces
import cv2
from doctr.io import DocumentFile
from doctr.models import detection_predictor
import os
import json
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename
UPLOAD_FOLDER = 'upload'
ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'}
filepath = hf_hub_download(repo_id="juliozhao/DocLayout-YOLO-DocStructBench", filename="doclayout_yolo_docstructbench_imgsz1024.pt", local_dir="/tmp", cache_dir="/tmp")
model = YOLOv10(filepath)
app = Flask(__name__)
app.secret_key = "super secret key"
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
doctr_model = detection_predictor(arch='db_resnet50', pretrained=True)
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@spaces.GPU
def predict(filename):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
return model.predict(
filename, # Image to predict
imgsz=1024, # Prediction image size
conf=0.2, # Confidence threshold
device=device
)
@app.route("/", methods=['GET', 'POST'])
def doc_layout():
if request.method == 'POST':
if 'file' not in request.files:
flash('no file part')
return redirect(request.url)
file = request.files['file']
if file.filename == '':
flash('No selected file')
return redirect(request.url)
if file and allowed_file(file.filename):
filename = "/tmp/" + secure_filename(file.filename)
file.save(filename)
det_res = predict(filename)
names = det_res[0].names
blocknames = [names[int(n)] for n in det_res[0].boxes.cls]
xyxy = [a.tolist() for a in det_res[0].boxes.xyxy]
res = [{"coords": y, "type": x} for x, y in zip(blocknames, xyxy)]
pimg = cv2.imread(filename)
blocks = dict()
for i, r in enumerate(res):
if r["type"] == "plain text":
x, y, x1, y1 = r["coords"]
x, y, x1, y1 = int(x), int(y), int(x1), int(y1)
block = pimg[y:y1, x:x1]
result = doctr_model([block])
blocks["block" + str(i)] = result[0]["words"].tolist()
return json.dumps(blocks)
return '''
<!doctype html>
<title>Upload new File</title>
<h1>Upload new File</h1>
<form method=post enctype=multipart/form-data>
<input type=file name=file>
<input type=submit value=Upload>
</form>
'''
if __name__ == "__main__":
app.run(host='0.0.0.0', port=8080)