Spaces:

kai-sheng
/

text-extraction-and-image-captioning

Sleeping

kai-sheng commited on Apr 15, 2024

Commit

8384356

verified ·

1 Parent(s): d7b2ea0

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -3,9 +3,10 @@ from flask import Flask, request, jsonify
 import base64
 import pytesseract
 import numpy as np
 from pickle import load
 from PIL import Image
-from keras.applications.xception import Xception #to get pre-trained model Xception
 from keras.models import load_model
 from keras.preprocessing.sequence import pad_sequences
@@ -13,6 +14,17 @@ app = Flask(__name__)
 MAX_LENGTH = 38
 def format_tesseract_output(output_text):
     formatted_text = ""
     lines = output_text.strip().split("\n")
@@ -34,7 +46,7 @@ def extract_features(image_data, model):
     image = image.resize((299,299))
     image = np.array(image)
-    # for 4 channels images, we need to convert them into 3 channels
     if image.shape[2] == 4:
         image = image[..., :3]
@@ -77,7 +89,7 @@ def generate_caption():
         image_data = base64.b64decode(base64_image_data)
         # Convert the image data to a PIL image object
-        pil_image = Image.open(io.BytesIO(img_path))
         extracted_text = pytesseract.image_to_string(pil_image, lang="eng+chi_sim+msa")
         hasText = bool(extracted_text.strip())

 import base64
 import pytesseract
 import numpy as np
+import tensorflow as tf
 from pickle import load
 from PIL import Image
+from keras.applications.xception import Xception # to get pre-trained model Xception
 from keras.models import load_model
 from keras.preprocessing.sequence import pad_sequences
 MAX_LENGTH = 38
+# Set up GPU memory growth
+physical_devices = tf.config.list_physical_devices('GPU')
+if physical_devices:
+    try:
+        # Allow memory growth for all GPUs
+        for gpu in physical_devices:
+            tf.config.experimental.set_memory_growth(gpu, True)
+        print("GPU(s) memory growth set to True")
+    except RuntimeError as e:
+        print(e)
 def format_tesseract_output(output_text):
     formatted_text = ""
     lines = output_text.strip().split("\n")
     image = image.resize((299,299))
     image = np.array(image)
+    # convert 4 channels image into 3 channels
     if image.shape[2] == 4:
         image = image[..., :3]
         image_data = base64.b64decode(base64_image_data)
         # Convert the image data to a PIL image object
+        pil_image = Image.open(io.BytesIO(image_data))
         extracted_text = pytesseract.image_to_string(pil_image, lang="eng+chi_sim+msa")
         hasText = bool(extracted_text.strip())