my-ocr-demo / app.py
gahanmakwana's picture
fix: add werkzeug dependency
2804140
raw
history blame
3.7 kB
# from flask import Flask, render_template, request, send_from_directory
# from paddleocr import PaddleOCR
# import os
# app = Flask(__name__)
# # Upload folder
# UPLOAD_FOLDER = 'uploads'
# app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# if not os.path.exists(UPLOAD_FOLDER):
# os.makedirs(UPLOAD_FOLDER)
# # Initialize OCR
# ocr = PaddleOCR(use_angle_cls=True, lang='en')
# @app.route('/', methods=['GET', 'POST'])
# def upload_file():
# text = None
# filename = None
# if request.method == 'POST':
# file = request.files.get('file')
# if not file or file.filename == '':
# return render_template('index.html', error="No file selected")
# filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
# file.save(filepath)
# # Run OCR
# result = ocr.ocr(filepath, cls=True)
# extracted_text = ""
# for line in result:
# for word_info in line:
# extracted_text += word_info[1][0] + " "
# text = extracted_text
# filename = file.filename
# return render_template('index.html', text=text, filename=filename)
# @app.route('/uploads/<filename>')
# def uploaded_file(filename):
# return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
# if __name__ == '__main__':
# port = int(os.environ.get('PORT', 5000)) # <-- IMPORTANT
# app.run(host='0.0.0.0', port=port)
from flask import Flask, render_template, request, send_from_directory
from paddleocr import PaddleOCR
import os
import time
import logging
app = Flask(__name__)
# Configure minimal logging
logging.basicConfig(level=logging.WARNING)
# Upload folder
UPLOAD_FOLDER = 'uploads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
# Lightweight OCR initialization
def get_ocr():
return PaddleOCR(
lang='en',
use_angle_cls=False, # Disable angle classifier to save memory
use_gpu=False,
enable_mkldnn=True, # CPU optimization
rec_batch_num=1, # Process one line at a time
det_limit_side_len=480, # Smaller image size
thread_num=1 # Critical for free tier
)
@app.route('/', methods=['GET', 'POST'])
def upload_file():
if request.method == 'POST':
file = request.files.get('file')
if not file or file.filename == '':
return render_template('index.html', error="No file selected")
try:
# Verify file size (<500KB)
file.seek(0, os.SEEK_END)
if file.tell() > 500000:
return render_template('index.html', error="File too large (max 500KB)")
file.seek(0)
# Save with timestamp
filename = f"{int(time.time())}_{file.filename}"
filepath = os.path.join(UPLOAD_FOLDER, filename)
file.save(filepath)
# Initialize OCR per-request (avoids memory buildup)
ocr = get_ocr()
# Fast OCR with small image
result = ocr.ocr(filepath, cls=False)[0] # [0] gets first batch
text = ' '.join([word[1][0] for word in result if len(word) >= 2])
return render_template('index.html', text=text, filename=filename)
except Exception as e:
if os.path.exists(filepath):
os.remove(filepath)
return render_template('index.html', error=f"Error: {str(e)}")
return render_template('index.html')
@app.route('/uploads/<filename>')
def uploaded_file(filename):
return send_from_directory(UPLOAD_FOLDER, filename)
if __name__ == '__main__':
port = int(os.environ.get('PORT', 5000))
app.run(host='0.0.0.0', port=port, threaded=False)