from flask import Flask from doclayout_yolo import YOLOv10 from huggingface_hub import hf_hub_download import torch import spaces import cv2 from doctr.io import DocumentFile from doctr.models import detection_predictor import os import json from flask import Flask, flash, request, redirect, url_for from werkzeug.utils import secure_filename UPLOAD_FOLDER = 'upload' ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'} filepath = hf_hub_download(repo_id="juliozhao/DocLayout-YOLO-DocStructBench", filename="doclayout_yolo_docstructbench_imgsz1024.pt", local_dir="/tmp", cache_dir="/tmp") model = YOLOv10(filepath) app = Flask(__name__) app.secret_key = "super secret key" app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER doctr_model = detection_predictor(arch='db_resnet50', pretrained=True) def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS @spaces.GPU def predict(filename): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) return model.predict( filename, # Image to predict imgsz=1024, # Prediction image size conf=0.2, # Confidence threshold device=device ) @app.route("/", methods=['GET', 'POST']) def doc_layout(): if request.method == 'POST': if 'file' not in request.files: flash('no file part') return redirect(request.url) file = request.files['file'] if file.filename == '': flash('No selected file') return redirect(request.url) if file and allowed_file(file.filename): filename = "/tmp/" + secure_filename(file.filename) file.save(filename) det_res = predict(filename) names = det_res[0].names blocknames = [names[int(n)] for n in det_res[0].boxes.cls] xyxy = [a.tolist() for a in det_res[0].boxes.xyxy] res = [{"coords": y, "type": x} for x, y in zip(blocknames, xyxy)] pimg = cv2.imread(filename) blocks = dict() for i, r in enumerate(res): if r["type"] == "plain text": x, y, x1, y1 = r["coords"] x, y, x1, y1 = int(x), int(y), int(x1), int(y1) block = pimg[y:y1, x:x1] result = doctr_model([block]) blocks["block" + str(i)] = result[0]["words"].tolist() return json.dumps(blocks) return ''' Upload new File

Upload new File

''' if __name__ == "__main__": app.run(host='0.0.0.0', port=8080)