File size: 3,702 Bytes
bbbfa2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e302e06
93c87da
0bd0f00
bbbfa2a
2804140
93c87da
 
0bd0f00
2804140
 
 
44154a0
93c87da
2804140
 
 
 
 
bbbfa2a
2804140
 
bbbfa2a
2804140
 
 
bbbfa2a
93c87da
 
 
 
44154a0
 
 
 
bbbfa2a
2804140
 
 
 
 
 
 
 
 
bbbfa2a
44154a0
2804140
 
bbbfa2a
2804140
 
 
44154a0
2804140
bbbfa2a
 
2804140
 
 
bbbfa2a
2804140
44154a0
e302e06
 
2804140
e302e06
93c87da
bbbfa2a
2804140
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# from flask import Flask, render_template, request, send_from_directory
# from paddleocr import PaddleOCR
# import os

# app = Flask(__name__)

# # Upload folder
# UPLOAD_FOLDER = 'uploads'
# app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# if not os.path.exists(UPLOAD_FOLDER):
#     os.makedirs(UPLOAD_FOLDER)

# # Initialize OCR
# ocr = PaddleOCR(use_angle_cls=True, lang='en')

# @app.route('/', methods=['GET', 'POST'])
# def upload_file():
#     text = None
#     filename = None
#     if request.method == 'POST':
#         file = request.files.get('file')
#         if not file or file.filename == '':
#             return render_template('index.html', error="No file selected")

#         filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
#         file.save(filepath)

#         # Run OCR
#         result = ocr.ocr(filepath, cls=True)
#         extracted_text = ""
#         for line in result:
#             for word_info in line:
#                 extracted_text += word_info[1][0] + " "

#         text = extracted_text
#         filename = file.filename

#     return render_template('index.html', text=text, filename=filename)

# @app.route('/uploads/<filename>')
# def uploaded_file(filename):
#     return send_from_directory(app.config['UPLOAD_FOLDER'], filename)

# if __name__ == '__main__':
#     port = int(os.environ.get('PORT', 5000))  # <-- IMPORTANT
#     app.run(host='0.0.0.0', port=port)

from flask import Flask, render_template, request, send_from_directory
from paddleocr import PaddleOCR
import os
import time
import logging

app = Flask(__name__)

# Configure minimal logging
logging.basicConfig(level=logging.WARNING)

# Upload folder
UPLOAD_FOLDER = 'uploads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# Lightweight OCR initialization
def get_ocr():
    return PaddleOCR(
        lang='en',
        use_angle_cls=False,  # Disable angle classifier to save memory
        use_gpu=False,
        enable_mkldnn=True,  # CPU optimization
        rec_batch_num=1,     # Process one line at a time
        det_limit_side_len=480,  # Smaller image size
        thread_num=1         # Critical for free tier
    )

@app.route('/', methods=['GET', 'POST'])
def upload_file():
    if request.method == 'POST':
        file = request.files.get('file')
        if not file or file.filename == '':
            return render_template('index.html', error="No file selected")

        try:
            # Verify file size (<500KB)
            file.seek(0, os.SEEK_END)
            if file.tell() > 500000:
                return render_template('index.html', error="File too large (max 500KB)")
            file.seek(0)

            # Save with timestamp
            filename = f"{int(time.time())}_{file.filename}"
            filepath = os.path.join(UPLOAD_FOLDER, filename)
            file.save(filepath)

            # Initialize OCR per-request (avoids memory buildup)
            ocr = get_ocr()
            
            # Fast OCR with small image
            result = ocr.ocr(filepath, cls=False)[0]  # [0] gets first batch
            text = ' '.join([word[1][0] for word in result if len(word) >= 2])

            return render_template('index.html', text=text, filename=filename)

        except Exception as e:
            if os.path.exists(filepath):
                os.remove(filepath)
            return render_template('index.html', error=f"Error: {str(e)}")

    return render_template('index.html')

@app.route('/uploads/<filename>')
def uploaded_file(filename):
    return send_from_directory(UPLOAD_FOLDER, filename)

if __name__ == '__main__':
    port = int(os.environ.get('PORT', 5000))
    app.run(host='0.0.0.0', port=port, threaded=False)