File size: 3,409 Bytes
a426d06 f817ecb a426d06 d7b2ea0 a426d06 8384356 a426d06 eae8233 a426d06 58f886c a426d06 b0a604c eae8233 b0a604c eae8233 b0a604c d7b2ea0 a426d06 8384356 a426d06 d7b2ea0 8384356 d7b2ea0 eae8233 d7b2ea0 a426d06 d7b2ea0 a426d06 d7b2ea0 a426d06 d7b2ea0 a426d06 d7b2ea0 a426d06 9b3d99a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import io
import os
from flask import Flask, request, jsonify
import base64
import pytesseract
import numpy as np
from pickle import load
from PIL import Image
from keras.applications.xception import Xception # to get pre-trained model Xception
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
print(os.popen(f'cat /etc/debian_version').read())
print(os.popen(f'cat /etc/issue').read())
print(os.popen(f'apt search tesseract').read())
app = Flask(__name__)
MAX_LENGTH = 38
# Set the TESSDATA_PREFIX environment variable
# os.environ['TESSDATA_PREFIX'] = '/cache/huggingface/downloads/tesseract-ocr/4.00/tessdata'
# Set the path to the Tesseract executable
# pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
def format_tesseract_output(output_text):
formatted_text = ""
lines = output_text.strip().split("\n")
for line in lines:
line = line.strip()
if line:
formatted_text += line + "\n"
return formatted_text
def extract_features(image_data, model):
try:
image = Image.open(io.BytesIO(image_data))
except Exception as e:
return None
image = image.resize((299,299))
image = np.array(image)
# convert 4 channels image into 3 channels
if image.shape[2] == 4:
image = image[..., :3]
image = np.expand_dims(image, axis=0)
image = image/127.5
image = image - 1.0
feature = model.predict(image)
return feature
def word_for_id(integer, tokenizer):
for word, index in tokenizer.word_index.items():
if index == integer:
return word
return None
def generate_desc(model, tokenizer, photo, max_length):
in_text = 'start'
for i in range(max_length):
sequence = tokenizer.texts_to_sequences([in_text])[0]
sequence = pad_sequences([sequence], maxlen=max_length)
pred = model.predict([photo,sequence], verbose=0)
pred = np.argmax(pred)
word = word_for_id(pred, tokenizer)
if word is None or word == 'end':
break
in_text += ' ' + word
return in_text.replace('start ', '')
# API endpoint to receive image and generate caption
@app.route('/api', methods=['POST'])
def generate_caption():
try:
base64_image_data = request.form['image']
# Decode the Base64 string into binary image data
image_data = base64.b64decode(base64_image_data)
# Convert the image data to a PIL image object
pil_image = Image.open(io.BytesIO(image_data))
extracted_text = pytesseract.image_to_string(pil_image)
hasText = bool(extracted_text.strip())
if hasText:
result = format_tesseract_output(extracted_text)
else:
tokenizer = load(open("tokenizer.p","rb"))
model = load_model('model_9.keras')
xception_model = Xception(include_top=False, pooling="avg")
photo = extract_features(image_data, xception_model)
if photo is None:
return jsonify({'error': 'Failed to extract features from the image'}), 400
result = generate_desc(model, tokenizer, photo, MAX_LENGTH)
return jsonify({'hasText': hasText, 'result': result}), 200
except Exception as e:
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run()
|