gahanmakwana commited on
Commit
2804140
·
1 Parent(s): bbbfa2a

fix: add werkzeug dependency

Browse files
Files changed (2) hide show
  1. app.py +35 -44
  2. requirements.txt +1 -13
app.py CHANGED
@@ -49,77 +49,68 @@ from flask import Flask, render_template, request, send_from_directory
49
  from paddleocr import PaddleOCR
50
  import os
51
  import time
 
52
 
53
  app = Flask(__name__)
54
 
 
 
 
55
  # Upload folder
56
  UPLOAD_FOLDER = 'uploads'
57
- app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
58
- if not os.path.exists(UPLOAD_FOLDER):
59
- os.makedirs(UPLOAD_FOLDER)
60
-
61
- # Initialize OCR with error handling and optimized settings
62
- try:
63
- ocr = PaddleOCR(
64
- use_angle_cls=True,
65
  lang='en',
66
- use_gpu=False, # Disable GPU on Render
67
- rec_model_dir='paddle_models/rec', # Cache models
68
- det_model_dir='paddle_models/det',
69
- cls_model_dir='paddle_models/cls',
70
  enable_mkldnn=True, # CPU optimization
71
- thread_num=2 # Limit threads to prevent OOM
 
 
72
  )
73
- except Exception as e:
74
- print(f"OCR initialization failed: {str(e)}")
75
- ocr = None
76
 
77
  @app.route('/', methods=['GET', 'POST'])
78
  def upload_file():
79
- text = None
80
- filename = None
81
- error = None
82
-
83
  if request.method == 'POST':
84
  file = request.files.get('file')
85
  if not file or file.filename == '':
86
  return render_template('index.html', error="No file selected")
87
 
88
  try:
89
- # Save file with timestamp to prevent overwrites
90
- timestamp = str(int(time.time()))
91
- safe_filename = f"{timestamp}_{file.filename}"
92
- filepath = os.path.join(app.config['UPLOAD_FOLDER'], safe_filename)
 
 
 
 
 
93
  file.save(filepath)
94
 
95
- # Check OCR initialization
96
- if not ocr:
97
- raise Exception("OCR engine not available")
98
-
99
- # Run OCR with timeout safeguard
100
- start_time = time.time()
101
- result = ocr.ocr(filepath, cls=True)
102
 
103
- # Process results
104
- extracted_text = ""
105
- if result and len(result) > 0:
106
- for line in result[0]: # Note: result[0] contains the actual OCR data
107
- if line and len(line) >= 2: # Check if line has text information
108
- extracted_text += line[1][0] + " "
109
 
110
- text = extracted_text.strip()
111
- filename = safe_filename
112
 
113
  except Exception as e:
114
- error = f"Error processing file: {str(e)}"
115
- print(error)
 
116
 
117
- return render_template('index.html', text=text, filename=filename, error=error)
118
 
119
  @app.route('/uploads/<filename>')
120
  def uploaded_file(filename):
121
- return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
122
 
123
  if __name__ == '__main__':
124
  port = int(os.environ.get('PORT', 5000))
125
- app.run(host='0.0.0.0', port=port)
 
49
  from paddleocr import PaddleOCR
50
  import os
51
  import time
52
+ import logging
53
 
54
  app = Flask(__name__)
55
 
56
+ # Configure minimal logging
57
+ logging.basicConfig(level=logging.WARNING)
58
+
59
  # Upload folder
60
  UPLOAD_FOLDER = 'uploads'
61
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
62
+
63
+ # Lightweight OCR initialization
64
+ def get_ocr():
65
+ return PaddleOCR(
 
 
 
66
  lang='en',
67
+ use_angle_cls=False, # Disable angle classifier to save memory
68
+ use_gpu=False,
 
 
69
  enable_mkldnn=True, # CPU optimization
70
+ rec_batch_num=1, # Process one line at a time
71
+ det_limit_side_len=480, # Smaller image size
72
+ thread_num=1 # Critical for free tier
73
  )
 
 
 
74
 
75
  @app.route('/', methods=['GET', 'POST'])
76
  def upload_file():
 
 
 
 
77
  if request.method == 'POST':
78
  file = request.files.get('file')
79
  if not file or file.filename == '':
80
  return render_template('index.html', error="No file selected")
81
 
82
  try:
83
+ # Verify file size (<500KB)
84
+ file.seek(0, os.SEEK_END)
85
+ if file.tell() > 500000:
86
+ return render_template('index.html', error="File too large (max 500KB)")
87
+ file.seek(0)
88
+
89
+ # Save with timestamp
90
+ filename = f"{int(time.time())}_{file.filename}"
91
+ filepath = os.path.join(UPLOAD_FOLDER, filename)
92
  file.save(filepath)
93
 
94
+ # Initialize OCR per-request (avoids memory buildup)
95
+ ocr = get_ocr()
 
 
 
 
 
96
 
97
+ # Fast OCR with small image
98
+ result = ocr.ocr(filepath, cls=False)[0] # [0] gets first batch
99
+ text = ' '.join([word[1][0] for word in result if len(word) >= 2])
 
 
 
100
 
101
+ return render_template('index.html', text=text, filename=filename)
 
102
 
103
  except Exception as e:
104
+ if os.path.exists(filepath):
105
+ os.remove(filepath)
106
+ return render_template('index.html', error=f"Error: {str(e)}")
107
 
108
+ return render_template('index.html')
109
 
110
  @app.route('/uploads/<filename>')
111
  def uploaded_file(filename):
112
+ return send_from_directory(UPLOAD_FOLDER, filename)
113
 
114
  if __name__ == '__main__':
115
  port = int(os.environ.get('PORT', 5000))
116
+ app.run(host='0.0.0.0', port=port, threaded=False)
requirements.txt CHANGED
@@ -19,16 +19,4 @@ flask==2.2.5
19
  paddleocr==2.7.0.3
20
  paddlepaddle==2.6.1
21
  opencv-python-headless==4.8.1.78
22
- shapely==2.0.2
23
- scikit-image==0.22.0
24
- imgaug==0.4.0
25
- pyclipper==1.3.0.post6
26
- lmdb==1.4.1
27
- tqdm==4.66.1
28
- numpy==1.26.0
29
- visualdl==2.5.3
30
- python-Levenshtein==0.23.0
31
- werkzeug==2.2.3
32
- markupsafe==2.1.3
33
- click==8.1.7
34
- blinker==1.7.0
 
19
  paddleocr==2.7.0.3
20
  paddlepaddle==2.6.1
21
  opencv-python-headless==4.8.1.78
22
+ numpy==1.26.0