File size: 3,993 Bytes
bbbfa2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e302e06
93c87da
0bd0f00
bbbfa2a
93c87da
 
0bd0f00
44154a0
93c87da
44154a0
0bd0f00
 
93c87da
bbbfa2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93c87da
 
 
44154a0
 
bbbfa2a
 
93c87da
44154a0
 
 
 
bbbfa2a
 
 
 
 
 
44154a0
bbbfa2a
 
 
44154a0
bbbfa2a
 
 
 
 
 
 
 
 
 
44154a0
bbbfa2a
 
 
 
 
 
 
 
44154a0
e302e06
 
 
 
93c87da
bbbfa2a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# from flask import Flask, render_template, request, send_from_directory
# from paddleocr import PaddleOCR
# import os

# app = Flask(__name__)

# # Upload folder
# UPLOAD_FOLDER = 'uploads'
# app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# if not os.path.exists(UPLOAD_FOLDER):
#     os.makedirs(UPLOAD_FOLDER)

# # Initialize OCR
# ocr = PaddleOCR(use_angle_cls=True, lang='en')

# @app.route('/', methods=['GET', 'POST'])
# def upload_file():
#     text = None
#     filename = None
#     if request.method == 'POST':
#         file = request.files.get('file')
#         if not file or file.filename == '':
#             return render_template('index.html', error="No file selected")

#         filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
#         file.save(filepath)

#         # Run OCR
#         result = ocr.ocr(filepath, cls=True)
#         extracted_text = ""
#         for line in result:
#             for word_info in line:
#                 extracted_text += word_info[1][0] + " "

#         text = extracted_text
#         filename = file.filename

#     return render_template('index.html', text=text, filename=filename)

# @app.route('/uploads/<filename>')
# def uploaded_file(filename):
#     return send_from_directory(app.config['UPLOAD_FOLDER'], filename)

# if __name__ == '__main__':
#     port = int(os.environ.get('PORT', 5000))  # <-- IMPORTANT
#     app.run(host='0.0.0.0', port=port)

from flask import Flask, render_template, request, send_from_directory
from paddleocr import PaddleOCR
import os
import time

app = Flask(__name__)

# Upload folder
UPLOAD_FOLDER = 'uploads'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
if not os.path.exists(UPLOAD_FOLDER):
    os.makedirs(UPLOAD_FOLDER)

# Initialize OCR with error handling and optimized settings
try:
    ocr = PaddleOCR(
        use_angle_cls=True,
        lang='en',
        use_gpu=False,  # Disable GPU on Render
        rec_model_dir='paddle_models/rec',  # Cache models
        det_model_dir='paddle_models/det',
        cls_model_dir='paddle_models/cls',
        enable_mkldnn=True,  # CPU optimization
        thread_num=2  # Limit threads to prevent OOM
    )
except Exception as e:
    print(f"OCR initialization failed: {str(e)}")
    ocr = None

@app.route('/', methods=['GET', 'POST'])
def upload_file():
    text = None
    filename = None
    error = None
    
    if request.method == 'POST':
        file = request.files.get('file')
        if not file or file.filename == '':
            return render_template('index.html', error="No file selected")

        try:
            # Save file with timestamp to prevent overwrites
            timestamp = str(int(time.time()))
            safe_filename = f"{timestamp}_{file.filename}"
            filepath = os.path.join(app.config['UPLOAD_FOLDER'], safe_filename)
            file.save(filepath)

            # Check OCR initialization
            if not ocr:
                raise Exception("OCR engine not available")

            # Run OCR with timeout safeguard
            start_time = time.time()
            result = ocr.ocr(filepath, cls=True)
            
            # Process results
            extracted_text = ""
            if result and len(result) > 0:
                for line in result[0]:  # Note: result[0] contains the actual OCR data
                    if line and len(line) >= 2:  # Check if line has text information
                        extracted_text += line[1][0] + " "

            text = extracted_text.strip()
            filename = safe_filename

        except Exception as e:
            error = f"Error processing file: {str(e)}"
            print(error)

    return render_template('index.html', text=text, filename=filename, error=error)

@app.route('/uploads/<filename>')
def uploaded_file(filename):
    return send_from_directory(app.config['UPLOAD_FOLDER'], filename)

if __name__ == '__main__':
    port = int(os.environ.get('PORT', 5000))
    app.run(host='0.0.0.0', port=port)