|
from flask import Flask |
|
from doclayout_yolo import YOLOv10 |
|
from huggingface_hub import hf_hub_download |
|
import torch |
|
import spaces |
|
import cv2 |
|
from doctr.io import DocumentFile |
|
from doctr.models import detection_predictor |
|
|
|
import os |
|
import json |
|
from flask import Flask, flash, request, redirect, url_for |
|
from werkzeug.utils import secure_filename |
|
|
|
UPLOAD_FOLDER = 'upload' |
|
ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'} |
|
|
|
filepath = hf_hub_download(repo_id="juliozhao/DocLayout-YOLO-DocStructBench", filename="doclayout_yolo_docstructbench_imgsz1024.pt", local_dir="/tmp", cache_dir="/tmp") |
|
model = YOLOv10(filepath) |
|
|
|
app = Flask(__name__) |
|
app.secret_key = "super secret key" |
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
|
|
|
|
doctr_model = detection_predictor(arch='db_resnet50', pretrained=True) |
|
|
|
|
|
def allowed_file(filename): |
|
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS |
|
|
|
@spaces.GPU |
|
def predict(filename): |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
print(device) |
|
return model.predict( |
|
filename, |
|
imgsz=1024, |
|
conf=0.2, |
|
device=device |
|
) |
|
|
|
|
|
|
|
@app.route("/", methods=['GET', 'POST']) |
|
def doc_layout(): |
|
if request.method == 'POST': |
|
if 'file' not in request.files: |
|
flash('no file part') |
|
return redirect(request.url) |
|
file = request.files['file'] |
|
if file.filename == '': |
|
flash('No selected file') |
|
return redirect(request.url) |
|
if file and allowed_file(file.filename): |
|
filename = "/tmp/" + secure_filename(file.filename) |
|
file.save(filename) |
|
det_res = predict(filename) |
|
names = det_res[0].names |
|
blocknames = [names[int(n)] for n in det_res[0].boxes.cls] |
|
xyxy = [a.tolist() for a in det_res[0].boxes.xyxy] |
|
res = [{"coords": y, "type": x} for x, y in zip(blocknames, xyxy)] |
|
pimg = cv2.imread(filename) |
|
blocks = dict() |
|
for i, r in enumerate(res): |
|
if r["type"] == "plain text": |
|
x, y, x1, y1 = r["coords"] |
|
x, y, x1, y1 = int(x), int(y), int(x1), int(y1) |
|
block = pimg[y:y1, x:x1] |
|
result = doctr_model([block]) |
|
blocks["block" + str(i)] = result[0]["words"].tolist() |
|
|
|
return json.dumps(blocks) |
|
return ''' |
|
<!doctype html> |
|
<title>Upload new File</title> |
|
<h1>Upload new File</h1> |
|
<form method=post enctype=multipart/form-data> |
|
<input type=file name=file> |
|
<input type=submit value=Upload> |
|
</form> |
|
''' |
|
|
|
|
|
if __name__ == "__main__": |
|
app.run(host='0.0.0.0', port=8080) |
|
|