|
import io |
|
from flask import Flask, request, jsonify |
|
import base64 |
|
import pytesseract |
|
import numpy as np |
|
from pickle import load |
|
from PIL import Image |
|
from keras.applications.xception import Xception |
|
from keras.models import load_model |
|
from keras.preprocessing.sequence import pad_sequences |
|
|
|
app = Flask(__name__) |
|
|
|
MAX_LENGTH = 34 |
|
|
|
def format_tesseract_output(output_text): |
|
formatted_text = "" |
|
lines = output_text.strip().split("\n") |
|
for line in lines: |
|
line = line.strip() |
|
if line: |
|
formatted_text += line + "\n" |
|
return formatted_text |
|
|
|
|
|
def extract_features(image_data, model): |
|
try: |
|
image = Image.open(io.BytesIO(image_data)) |
|
except Exception as e: |
|
return None |
|
|
|
image = image.resize((299,299)) |
|
image = np.array(image) |
|
|
|
|
|
if image.shape[2] == 4: |
|
image = image[..., :3] |
|
|
|
image = np.expand_dims(image, axis=0) |
|
image = image/127.5 |
|
image = image - 1.0 |
|
feature = model.predict(image) |
|
|
|
return feature |
|
|
|
|
|
def word_for_id(integer, tokenizer): |
|
for word, index in tokenizer.word_index.items(): |
|
if index == integer: |
|
return word |
|
return None |
|
|
|
|
|
def generate_desc(model, tokenizer, photo, max_length): |
|
in_text = 'start' |
|
for i in range(max_length): |
|
sequence = tokenizer.texts_to_sequences([in_text])[0] |
|
sequence = pad_sequences([sequence], maxlen=max_length) |
|
pred = model.predict([photo,sequence], verbose=0) |
|
pred = np.argmax(pred) |
|
word = word_for_id(pred, tokenizer) |
|
if word is None or word == 'end': |
|
break |
|
in_text += ' ' + word |
|
return in_text.replace('start ', '') |
|
|
|
|
|
|
|
@app.route('/api', methods=['POST']) |
|
def generate_caption(): |
|
try: |
|
base64_image_data = request.form['image'] |
|
|
|
|
|
image_data = base64.b64decode(base64_image_data) |
|
|
|
|
|
pil_image = Image.open(io.BytesIO(image_data)) |
|
|
|
extracted_text = pytesseract.image_to_string(pil_image, lang="eng+chi_sim+msa") |
|
hasText = bool(extracted_text.strip()) |
|
|
|
if hasText: |
|
result = format_tesseract_output(extracted_text) |
|
else: |
|
tokenizer = load(open("tokenizer.p","rb")) |
|
model = load_model('model.keras') |
|
|
|
xception_model = Xception(include_top=False, pooling="avg") |
|
photo = extract_features(image_data, xception_model) |
|
|
|
if photo is None: |
|
return jsonify({'error': 'Failed to extract features from the image'}), 400 |
|
|
|
result = generate_desc(model, tokenizer, photo, MAX_LENGTH) |
|
|
|
return jsonify({'hasText': hasText, 'result': result}), 200 |
|
except Exception as e: |
|
return jsonify({'error': str(e)}), 500 |
|
|
|
if __name__ == '__main__': |
|
app.run() |
|
|