import io |
from flask import Flask, request, jsonify |
import base64 |
import pytesseract |
import numpy as np |
from pickle import load |
from PIL import Image |
from keras.applications.xception import Xception |
from keras.models import load_model |
from keras.preprocessing.sequence import pad_sequences |
app = Flask(__name__) |
def format_tesseract_output(output_text): |
formatted_text = "" |
lines = output_text.strip().split("\n") |
for line in lines: |
line = line.strip() |
if line: |
formatted_text += line + "\n" |
return formatted_text |
def extract_features(image_data, model): |
try: |
image = Image.open(io.BytesIO(image_data)) |
except Exception as e: |
return None |
image = image.resize((299,299)) |
image = np.array(image) |
if image.shape[2] == 4: |
image = image[..., :3] |
image = np.expand_dims(image, axis=0) |
image = image/127.5 |
image = image - 1.0 |
feature = model.predict(image) |
return feature |
def word_for_id(integer, tokenizer): |
for word, index in tokenizer.word_index.items(): |
if index == integer: |
return word |
return None |
def generate_desc(model, tokenizer, photo, max_length): |
in_text = 'start' |
for i in range(max_length): |
sequence = tokenizer.texts_to_sequences([in_text])[0] |
sequence = pad_sequences([sequence], maxlen=max_length) |
pred = model.predict([photo,sequence], verbose=0) |
pred = np.argmax(pred) |
word = word_for_id(pred, tokenizer) |
if word is None or word == 'end': |
break |
in_text += ' ' + word |
return in_text.replace('start ', '') |
@app.route('/api', methods=['POST']) |
def generate_caption(): |
try: |
base64_image_data = request.form['image'] |
image_data = base64.b64decode(base64_image_data) |
pil_image = Image.open(io.BytesIO(image_data)) |
extracted_text = pytesseract.image_to_string(pil_image, lang="eng+chi_sim+msa") |
hasText = bool(extracted_text.strip()) |
if hasText: |
result = format_tesseract_output(extracted_text) |
else: |
tokenizer = load(open("tokenizer.p","rb")) |
model = load_model('model.keras') |
xception_model = Xception(include_top=False, pooling="avg") |
photo = extract_features(image_data, xception_model) |
if photo is None: |
return jsonify({'error': 'Failed to extract features from the image'}), 400 |
result = generate_desc(model, tokenizer, photo, MAX_LENGTH) |
return jsonify({'hasText': hasText, 'result': result}), 200 |
except Exception as e: |
return jsonify({'error': str(e)}), 500 |
if __name__ == '__main__': |
app.run() |