gahanmakwana commited on
Commit
7550ca1
·
1 Parent(s): 3d48a10
Files changed (1) hide show
  1. app.py +24 -2
app.py CHANGED
@@ -2,6 +2,8 @@ from flask import Flask, render_template, request, redirect, flash, url_for
2
  import os
3
  from werkzeug.utils import secure_filename
4
  from paddleocr import PaddleOCR
 
 
5
 
6
  app = Flask(__name__)
7
  app.secret_key = os.environ.get('SECRET_KEY', 'change-this') # Replace in production
@@ -12,7 +14,20 @@ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
12
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
13
 
14
  # Initialize PaddleOCR once at the start (use CPU mode)
15
- ocr = PaddleOCR(use_angle_cls=False, use_gpu=False, lang='en')
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  @app.route('/', methods=['GET', 'POST'])
18
  def index():
@@ -34,7 +49,10 @@ def index():
34
  file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
35
  file.save(file_path)
36
 
37
- # Run PaddleOCR on the saved image (CPU mode)
 
 
 
38
  result = ocr.ocr(file_path, cls=False)
39
  # Collect recognized text lines
40
  lines = []
@@ -44,6 +62,10 @@ def index():
44
  extracted_text = "\n".join(lines)
45
  image_file = filename
46
 
 
 
 
 
47
  return render_template('index.html', extracted_text=extracted_text, image_file=image_file)
48
 
49
  if __name__ == '__main__':
 
2
  import os
3
  from werkzeug.utils import secure_filename
4
  from paddleocr import PaddleOCR
5
+ from PIL import Image
6
+ import gc
7
 
8
  app = Flask(__name__)
9
  app.secret_key = os.environ.get('SECRET_KEY', 'change-this') # Replace in production
 
14
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
15
 
16
  # Initialize PaddleOCR once at the start (use CPU mode)
17
+ ocr = PaddleOCR(
18
+ use_angle_cls=False,
19
+ use_gpu=False,
20
+ lang='en',
21
+ det_model_dir='/tmp/ocr_models/det',
22
+ rec_model_dir='/tmp/ocr_models/rec',
23
+ cls_model_dir='/tmp/ocr_models/cls'
24
+ )
25
+
26
+ # Resize image before processing to reduce memory usage
27
+ def resize_image(image_path):
28
+ with Image.open(image_path) as img:
29
+ img.thumbnail((1024, 1024)) # Resize to max dimension of 1024x1024
30
+ img.save(image_path)
31
 
32
  @app.route('/', methods=['GET', 'POST'])
33
  def index():
 
49
  file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
50
  file.save(file_path)
51
 
52
+ # Resize the image to optimize memory usage
53
+ resize_image(file_path)
54
+
55
+ # Run PaddleOCR on the resized image (CPU mode)
56
  result = ocr.ocr(file_path, cls=False)
57
  # Collect recognized text lines
58
  lines = []
 
62
  extracted_text = "\n".join(lines)
63
  image_file = filename
64
 
65
+ # Clear memory after processing
66
+ del result
67
+ gc.collect()
68
+
69
  return render_template('index.html', extracted_text=extracted_text, image_file=image_file)
70
 
71
  if __name__ == '__main__':