Spaces:

kai-sheng
/

text-extraction-and-image-captioning

Sleeping

App Files Files Community

kai-sheng commited on Apr 15, 2024

Commit

d7b2ea0

verified ·

1 Parent(s): b2b9e88

Add text extraction

Browse files

Files changed (1) hide show

main.py +28 -9

main.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import io
 from flask import Flask, request, jsonify
 import base64
 import numpy as np
 from pickle import load
 from PIL import Image
@@ -12,6 +13,16 @@ app = Flask(__name__)
 MAX_LENGTH = 38
 def extract_features(image_data, model):
     try:
         image = Image.open(io.BytesIO(image_data))
@@ -65,19 +76,27 @@ def generate_caption():
         # Decode the Base64 string into binary image data
         image_data = base64.b64decode(base64_image_data)
-        tokenizer = load(open("tokenizer.p","rb"))
-        model = load_model('model_9.keras')
-        xception_model = Xception(include_top=False, pooling="avg")
-        photo = extract_features(image_data, xception_model)
-        if photo is None:
-            return jsonify({'error': 'Failed to extract features from the image'}), 400
-        caption = generate_desc(model, tokenizer, photo, MAX_LENGTH)
-        # Return the generated caption
-        return jsonify({'caption': caption}), 200
     except Exception as e:
         return jsonify({'error': str(e)}), 500

 import io
 from flask import Flask, request, jsonify
 import base64
+import pytesseract
 import numpy as np
 from pickle import load
 from PIL import Image
 MAX_LENGTH = 38
+def format_tesseract_output(output_text):
+    formatted_text = ""
+    lines = output_text.strip().split("\n")
+    for line in lines:
+        line = line.strip()
+        if line:
+            formatted_text += line + "\n"
+    return formatted_text
 def extract_features(image_data, model):
     try:
         image = Image.open(io.BytesIO(image_data))
         # Decode the Base64 string into binary image data
         image_data = base64.b64decode(base64_image_data)
+        # Convert the image data to a PIL image object
+        pil_image = Image.open(io.BytesIO(img_path))
+        extracted_text = pytesseract.image_to_string(pil_image, lang="eng+chi_sim+msa")
+        hasText = bool(extracted_text.strip())
+        if hasText:
+            result = format_tesseract_output(extracted_text)
+        else:
+            tokenizer = load(open("tokenizer.p","rb"))
+            model = load_model('model_9.keras')
+            xception_model = Xception(include_top=False, pooling="avg")
+            photo = extract_features(image_data, xception_model)
+            if photo is None:
+                return jsonify({'error': 'Failed to extract features from the image'}), 400
+            result = generate_desc(model, tokenizer, photo, MAX_LENGTH)
+        return jsonify({'hasText': hasText, 'result': result}), 200
     except Exception as e:
         return jsonify({'error': str(e)}), 500