File size: 2,830 Bytes
1bad634 089c072 95b8d1f 529ecd0 ed533ad 1bad634 4385b6a 1bad634 bd0c3e4 1bad634 ed533ad 529ecd0 1bad634 decd734 8356f0a decd734 1bad634 bea95b4 decd734 1bad634 9cf899b ed533ad 529ecd0 ed533ad b3ab4bb ed533ad 529ecd0 ed533ad 1bad634 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
from flask import Flask
from doclayout_yolo import YOLOv10
from huggingface_hub import hf_hub_download
import torch
import spaces
import cv2
from doctr.io import DocumentFile
from doctr.models import detection_predictor
import os
import json
from flask import Flask, flash, request, redirect, url_for
from werkzeug.utils import secure_filename
UPLOAD_FOLDER = 'upload'
ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'}
filepath = hf_hub_download(repo_id="juliozhao/DocLayout-YOLO-DocStructBench", filename="doclayout_yolo_docstructbench_imgsz1024.pt", local_dir="/tmp", cache_dir="/tmp")
model = YOLOv10(filepath)
app = Flask(__name__)
app.secret_key = "super secret key"
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
doctr_model = detection_predictor(arch='db_resnet50', pretrained=True)
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@spaces.GPU
def predict(filename):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
return model.predict(
filename, # Image to predict
imgsz=1024, # Prediction image size
conf=0.2, # Confidence threshold
device=device
)
@app.route("/", methods=['GET', 'POST'])
def doc_layout():
if request.method == 'POST':
if 'file' not in request.files:
flash('no file part')
return redirect(request.url)
file = request.files['file']
if file.filename == '':
flash('No selected file')
return redirect(request.url)
if file and allowed_file(file.filename):
filename = "/tmp/" + secure_filename(file.filename)
file.save(filename)
det_res = predict(filename)
names = det_res[0].names
blocknames = [names[int(n)] for n in det_res[0].boxes.cls]
xyxy = [a.tolist() for a in det_res[0].boxes.xyxy]
res = [{"coords": y, "type": x} for x, y in zip(blocknames, xyxy)]
pimg = cv2.imread(filename)
blocks = dict()
for i, r in enumerate(res):
if r["type"] == "plain text":
x, y, x1, y1 = r["coords"]
x, y, x1, y1 = int(x), int(y), int(x1), int(y1)
block = pimg[y:y1, x:x1]
result = doctr_model([block])
blocks["block" + str(i)] = result[0]["words"].tolist()
return json.dumps(blocks)
return '''
<!doctype html>
<title>Upload new File</title>
<h1>Upload new File</h1>
<form method=post enctype=multipart/form-data>
<input type=file name=file>
<input type=submit value=Upload>
</form>
'''
if __name__ == "__main__":
app.run(host='0.0.0.0', port=8080)
|