File size: 2,828 Bytes
1bad634
 
 
089c072
95b8d1f
529ecd0
 
ed533ad
1bad634
 
 
 
 
 
 
 
 
4385b6a
1bad634
 
 
bd0c3e4
1bad634
 
ed533ad
 
529ecd0
1bad634
 
 
 
decd734
 
 
8356f0a
decd734
 
 
 
 
 
 
 
1bad634
 
 
 
 
 
 
 
 
 
 
 
bea95b4
 
decd734
1bad634
 
 
 
9cf899b
ed533ad
 
529ecd0
 
 
ed533ad
 
 
529ecd0
ed533ad
1bad634
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from flask import Flask
from doclayout_yolo import YOLOv10
from huggingface_hub import hf_hub_download
import torch
import spaces
import cv2
from doctr.io import DocumentFile
from doctr.models import detection_predictor

import os
import json
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename

UPLOAD_FOLDER = 'upload'
ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'}

filepath = hf_hub_download(repo_id="juliozhao/DocLayout-YOLO-DocStructBench", filename="doclayout_yolo_docstructbench_imgsz1024.pt", local_dir="/tmp", cache_dir="/tmp")
model = YOLOv10(filepath)

app = Flask(__name__)
app.secret_key = "super secret key"
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER


doctr_model = detection_predictor(arch='db_resnet50', pretrained=True)


def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

@spaces.GPU
def predict(filename):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    return model.predict(
        filename,   # Image to predict
        imgsz=1024,        # Prediction image size
        conf=0.2,          # Confidence threshold
        device=device
    )



@app.route("/", methods=['GET', 'POST'])
def doc_layout():
    if request.method == 'POST':
        if 'file' not in request.files:
            flash('no file part')
            return redirect(request.url)
        file = request.files['file']
        if file.filename == '':
            flash('No selected file')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            filename = "/tmp/" + secure_filename(file.filename)
            file.save(filename)
            det_res = predict(filename)
            names = det_res[0].names
            blocknames = [names[int(n)] for n in det_res[0].boxes.cls]
            xyxy = [a.tolist() for a in det_res[0].boxes.xyxy]
            res = [{"coords": y, "type": x} for x, y in zip(blocknames, xyxy)]
            pimg = cv2.imread(filename)
            blocks = dict()
            for i, r in enumerate(res):
                if r["type"] == "plain text":
                    x, y, x1, y1 = r["coords"]
                    x, y, x1, y1 = int(x), int(y), int(x1), int(y1)
                    block = pimg[y:y1, x:x1]
                    result = doctr_model(block)
                    blocks["block" + str(i)] = result[0]["words"].tolist()

            return json.dumps(blocks)
    return '''
    <!doctype html>
    <title>Upload new File</title>
    <h1>Upload new File</h1>
    <form method=post enctype=multipart/form-data>
      <input type=file name=file>
      <input type=submit value=Upload>
    </form>
    '''


if __name__ == "__main__":
    app.run(host='0.0.0.0', port=8080)