File size: 2,830 Bytes
1bad634
 
 
089c072
95b8d1f
529ecd0
 
ed533ad
1bad634
 
 
 
 
 
 
 
 
4385b6a
1bad634
 
 
bd0c3e4
1bad634
 
ed533ad
 
529ecd0
1bad634
 
 
 
decd734
 
 
8356f0a
decd734
 
 
 
 
 
 
 
1bad634
 
 
 
 
 
 
 
 
 
 
 
bea95b4
 
decd734
1bad634
 
 
 
9cf899b
ed533ad
 
529ecd0
 
 
ed533ad
b3ab4bb
ed533ad
529ecd0
ed533ad
1bad634
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from flask import Flask
from doclayout_yolo import YOLOv10
from huggingface_hub import hf_hub_download
import torch
import spaces
import cv2
from doctr.io import DocumentFile
from doctr.models import detection_predictor

import os
import json
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename

UPLOAD_FOLDER = 'upload'
ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'}

filepath = hf_hub_download(repo_id="juliozhao/DocLayout-YOLO-DocStructBench", filename="doclayout_yolo_docstructbench_imgsz1024.pt", local_dir="/tmp", cache_dir="/tmp")
model = YOLOv10(filepath)

app = Flask(__name__)
app.secret_key = "super secret key"
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER


doctr_model = detection_predictor(arch='db_resnet50', pretrained=True)


def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

@spaces.GPU
def predict(filename):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    return model.predict(
        filename,   # Image to predict
        imgsz=1024,        # Prediction image size
        conf=0.2,          # Confidence threshold
        device=device
    )



@app.route("/", methods=['GET', 'POST'])
def doc_layout():
    if request.method == 'POST':
        if 'file' not in request.files:
            flash('no file part')
            return redirect(request.url)
        file = request.files['file']
        if file.filename == '':
            flash('No selected file')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            filename = "/tmp/" + secure_filename(file.filename)
            file.save(filename)
            det_res = predict(filename)
            names = det_res[0].names
            blocknames = [names[int(n)] for n in det_res[0].boxes.cls]
            xyxy = [a.tolist() for a in det_res[0].boxes.xyxy]
            res = [{"coords": y, "type": x} for x, y in zip(blocknames, xyxy)]
            pimg = cv2.imread(filename)
            blocks = dict()
            for i, r in enumerate(res):
                if r["type"] == "plain text":
                    x, y, x1, y1 = r["coords"]
                    x, y, x1, y1 = int(x), int(y), int(x1), int(y1)
                    block = pimg[y:y1, x:x1]
                    result = doctr_model([block])
                    blocks["block" + str(i)] = result[0]["words"].tolist()

            return json.dumps(blocks)
    return '''
    <!doctype html>
    <title>Upload new File</title>
    <h1>Upload new File</h1>
    <form method=post enctype=multipart/form-data>
      <input type=file name=file>
      <input type=submit value=Upload>
    </form>
    '''


if __name__ == "__main__":
    app.run(host='0.0.0.0', port=8080)