import os os.environ["KERAS_BACKEND"] = "tensorflow" import keras import numpy as np from PIL import Image import gradio as gr import tensorflow as tf from keras import layers from pathlib import Path from pathlib import Path from collections import Counter def ctc_batch_cost(y_true, y_pred, input_length, label_length): label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32) input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32) sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32) y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon()) return tf.expand_dims( tf.compat.v1.nn.ctc_loss( inputs=y_pred, labels=sparse_labels, sequence_length=input_length ), 1, ) def ctc_label_dense_to_sparse(labels, label_lengths): label_shape = tf.shape(labels) num_batches_tns = tf.stack([label_shape[0]]) max_num_labels_tns = tf.stack([label_shape[1]]) def range_less_than(old_input, current_input): return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill( max_num_labels_tns, current_input ) init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool) dense_mask = tf.compat.v1.scan( range_less_than, label_lengths, initializer=init, parallel_iterations=1 ) dense_mask = dense_mask[:, 0, :] label_array = tf.reshape( tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape ) label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask) batch_array = tf.transpose( tf.reshape( tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), tf.reverse(label_shape, [0]), ) ) batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask) indices = tf.transpose( tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1]) ) vals_sparse = tf.compat.v1.gather_nd(labels, indices) return tf.SparseTensor( tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64) ) class CTCLayer(layers.Layer): def __init__(self, name=None): super().__init__(name=name) self.loss_fn = ctc_batch_cost def call(self, y_true, y_pred): # Compute the training-time loss value and add it to the layer using `self.add_loss()`. batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64") input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64") label_length = tf.cast(tf.shape(y_true)[1], dtype="int64") input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64") label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64") loss = self.loss_fn(y_true, y_pred, input_length, label_length) self.add_loss(loss) # At test time, just return the computed predictions return y_pred loaded_model = keras.models.load_model("ocr_model_pred.h5", custom_objects={"CTCLayer": CTCLayer}) loaded_model.load_weights("ocr_model_pred_weights.h5") max_len = 5 characters = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] # Mapping characters to integers char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None) # Mapping integers back to original characters num_to_char = layers.StringLookup( vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True ) def decode_batch_predictions(pred): input_len = np.ones(pred.shape[0]) * pred.shape[1] # Use greedy search. For complex tasks, you can use beam search. results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][ :, :max_len ] # Iterate over the results and get back the text. output_text = [] for res in results: res = tf.gather(res, tf.where(tf.math.not_equal(res, -1))) res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8") output_text.append(res) return output_text interface = gr.Interface(fn=decode_batch_predictions, inputs=gr.Image(label="Input image", type="pil"), outputs='text',title='Captcha Recognition', theme='darkhuggingface') interface.launch(inline=False)