Spaces:
Running
Running
File size: 3,993 Bytes
bbbfa2a e302e06 93c87da 0bd0f00 bbbfa2a 93c87da 0bd0f00 44154a0 93c87da 44154a0 0bd0f00 93c87da bbbfa2a 93c87da 44154a0 bbbfa2a 93c87da 44154a0 bbbfa2a 44154a0 bbbfa2a 44154a0 bbbfa2a 44154a0 bbbfa2a 44154a0 e302e06 93c87da bbbfa2a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# from flask import Flask, render_template, request, send_from_directory
# from paddleocr import PaddleOCR
# import os
# app = Flask(__name__)
# # Upload folder
# UPLOAD_FOLDER = 'uploads'
# app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# if not os.path.exists(UPLOAD_FOLDER):
# os.makedirs(UPLOAD_FOLDER)
# # Initialize OCR
# ocr = PaddleOCR(use_angle_cls=True, lang='en')
# @app.route('/', methods=['GET', 'POST'])
# def upload_file():
# text = None
# filename = None
# if request.method == 'POST':
# file = request.files.get('file')
# if not file or file.filename == '':
# return render_template('index.html', error="No file selected")
# filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
# file.save(filepath)
# # Run OCR
# result = ocr.ocr(filepath, cls=True)
# extracted_text = ""
# for line in result:
# for word_info in line:
# extracted_text += word_info[1][0] + " "
# text = extracted_text
# filename = file.filename
# return render_template('index.html', text=text, filename=filename)
# @app.route('/uploads/<filename>')
# def uploaded_file(filename):
# return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
# if __name__ == '__main__':
# port = int(os.environ.get('PORT', 5000)) # <-- IMPORTANT
# app.run(host='0.0.0.0', port=port)
from flask import Flask, render_template, request, send_from_directory
from paddleocr import PaddleOCR
import os
import time
app = Flask(__name__)
# Upload folder
UPLOAD_FOLDER = 'uploads'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
if not os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER)
# Initialize OCR with error handling and optimized settings
try:
ocr = PaddleOCR(
use_angle_cls=True,
lang='en',
use_gpu=False, # Disable GPU on Render
rec_model_dir='paddle_models/rec', # Cache models
det_model_dir='paddle_models/det',
cls_model_dir='paddle_models/cls',
enable_mkldnn=True, # CPU optimization
thread_num=2 # Limit threads to prevent OOM
)
except Exception as e:
print(f"OCR initialization failed: {str(e)}")
ocr = None
@app.route('/', methods=['GET', 'POST'])
def upload_file():
text = None
filename = None
error = None
if request.method == 'POST':
file = request.files.get('file')
if not file or file.filename == '':
return render_template('index.html', error="No file selected")
try:
# Save file with timestamp to prevent overwrites
timestamp = str(int(time.time()))
safe_filename = f"{timestamp}_{file.filename}"
filepath = os.path.join(app.config['UPLOAD_FOLDER'], safe_filename)
file.save(filepath)
# Check OCR initialization
if not ocr:
raise Exception("OCR engine not available")
# Run OCR with timeout safeguard
start_time = time.time()
result = ocr.ocr(filepath, cls=True)
# Process results
extracted_text = ""
if result and len(result) > 0:
for line in result[0]: # Note: result[0] contains the actual OCR data
if line and len(line) >= 2: # Check if line has text information
extracted_text += line[1][0] + " "
text = extracted_text.strip()
filename = safe_filename
except Exception as e:
error = f"Error processing file: {str(e)}"
print(error)
return render_template('index.html', text=text, filename=filename, error=error)
@app.route('/uploads/<filename>')
def uploaded_file(filename):
return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
if __name__ == '__main__':
port = int(os.environ.get('PORT', 5000))
app.run(host='0.0.0.0', port=port) |