Spaces:

ssppkenny
/

doclayout

Sleeping

doclayout / app /app.py

Sergey Mikhno

args

b3ab4bb 20 days ago

2.83 kB

	from flask import Flask
	from doclayout_yolo import YOLOv10
	from huggingface_hub import hf_hub_download
	import torch
	import spaces
	import cv2
	from doctr.io import DocumentFile
	from doctr.models import detection_predictor

	import os
	import json
	from flask import Flask, flash, request, redirect, url_for
	from werkzeug.utils import secure_filename

	UPLOAD_FOLDER = 'upload'
	ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'}

	filepath = hf_hub_download(repo_id="juliozhao/DocLayout-YOLO-DocStructBench", filename="doclayout_yolo_docstructbench_imgsz1024.pt", local_dir="/tmp", cache_dir="/tmp")
	model = YOLOv10(filepath)

	app = Flask(__name__)
	app.secret_key = "super secret key"
	app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER


	doctr_model = detection_predictor(arch='db_resnet50', pretrained=True)


	def allowed_file(filename):
	return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

	@spaces.GPU
	def predict(filename):
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(device)
	return model.predict(
	filename, # Image to predict
	imgsz=1024, # Prediction image size
	conf=0.2, # Confidence threshold
	device=device
	)



	@app.route("/", methods=['GET', 'POST'])
	def doc_layout():
	if request.method == 'POST':
	if 'file' not in request.files:
	flash('no file part')
	return redirect(request.url)
	file = request.files['file']
	if file.filename == '':
	flash('No selected file')
	return redirect(request.url)
	if file and allowed_file(file.filename):
	filename = "/tmp/" + secure_filename(file.filename)
	file.save(filename)
	det_res = predict(filename)
	names = det_res[0].names
	blocknames = [names[int(n)] for n in det_res[0].boxes.cls]
	xyxy = [a.tolist() for a in det_res[0].boxes.xyxy]
	res = [{"coords": y, "type": x} for x, y in zip(blocknames, xyxy)]
	pimg = cv2.imread(filename)
	blocks = dict()
	for i, r in enumerate(res):
	if r["type"] == "plain text":
	x, y, x1, y1 = r["coords"]
	x, y, x1, y1 = int(x), int(y), int(x1), int(y1)
	block = pimg[y:y1, x:x1]
	result = doctr_model([block])
	blocks["block" + str(i)] = result[0]["words"].tolist()

	return json.dumps(blocks)
	return '''
	<!doctype html>
	<title>Upload new File</title>
	<h1>Upload new File</h1>
	<form method=post enctype=multipart/form-data>
	<input type=file name=file>
	<input type=submit value=Upload>
	</form>
	'''


	if __name__ == "__main__":
	app.run(host='0.0.0.0', port=8080)