Spaces:
Sleeping
Sleeping
| import os | |
| import io | |
| os.environ["KERAS_BACKEND"] = "tensorflow" | |
| import keras | |
| import numpy as np | |
| from PIL import Image | |
| import gradio as gr | |
| import tensorflow as tf | |
| from keras import layers | |
| from pathlib import Path | |
| from collections import Counter | |
| def ctc_batch_cost(y_true, y_pred, input_length, label_length): | |
| label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32) | |
| input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32) | |
| sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32) | |
| y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon()) | |
| return tf.expand_dims( | |
| tf.compat.v1.nn.ctc_loss( | |
| inputs=y_pred, labels=sparse_labels, sequence_length=input_length | |
| ), | |
| 1, | |
| ) | |
| def ctc_label_dense_to_sparse(labels, label_lengths): | |
| label_shape = tf.shape(labels) | |
| num_batches_tns = tf.stack([label_shape[0]]) | |
| max_num_labels_tns = tf.stack([label_shape[1]]) | |
| def range_less_than(old_input, current_input): | |
| return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill( | |
| max_num_labels_tns, current_input | |
| ) | |
| init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool) | |
| dense_mask = tf.compat.v1.scan( | |
| range_less_than, label_lengths, initializer=init, parallel_iterations=1 | |
| ) | |
| dense_mask = dense_mask[:, 0, :] | |
| label_array = tf.reshape( | |
| tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape | |
| ) | |
| label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask) | |
| batch_array = tf.transpose( | |
| tf.reshape( | |
| tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), | |
| tf.reverse(label_shape, [0]), | |
| ) | |
| ) | |
| batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask) | |
| indices = tf.transpose( | |
| tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1]) | |
| ) | |
| vals_sparse = tf.compat.v1.gather_nd(labels, indices) | |
| return tf.SparseTensor( | |
| tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64) | |
| ) | |
| class CTCLayer(layers.Layer): | |
| def __init__(self, name=None): | |
| super().__init__(name=name) | |
| self.loss_fn = ctc_batch_cost | |
| def call(self, y_true, y_pred): | |
| # Compute the training-time loss value and add it to the layer using `self.add_loss()`. | |
| batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64") | |
| input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64") | |
| label_length = tf.cast(tf.shape(y_true)[1], dtype="int64") | |
| input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64") | |
| label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64") | |
| loss = self.loss_fn(y_true, y_pred, input_length, label_length) | |
| self.add_loss(loss) | |
| # At test time, just return the computed predictions | |
| return y_pred | |
| loaded_model = keras.models.load_model("ocr_model_pred.h5", custom_objects={"CTCLayer": CTCLayer}) | |
| loaded_model.load_weights("ocr_model_pred_weights.h5") | |
| max_len = 5 | |
| characters = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] | |
| # Mapping characters to integers | |
| char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None) | |
| # Mapping integers back to original characters | |
| num_to_char = layers.StringLookup( | |
| vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True | |
| ) | |
| def distortion_free_resize(image, img_size): | |
| w, h = img_size | |
| image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True) | |
| # Check tha amount of padding needed to be done. | |
| pad_height = h - tf.shape(image)[0] | |
| pad_width = w - tf.shape(image)[1] | |
| # only necessary if you want to do same amount of padding on both sides. | |
| if pad_height % 2 != 0: | |
| height = pad_height // 2 | |
| pad_height_top = height +1 | |
| pad_height_bottom = height | |
| else: | |
| pad_height_top = pad_height_bottom = pad_height // 2 | |
| if pad_width % 2 != 0: | |
| width = pad_width // 2 | |
| pad_width_left = width + 1 | |
| pad_width_right = width | |
| else: | |
| pad_width_left = pad_width_right = pad_width // 2 | |
| image = tf.pad( | |
| image, paddings=[ | |
| [pad_height_top, pad_height_bottom], | |
| [pad_width_left, pad_width_right], | |
| [0, 0], | |
| ],) | |
| image = tf.transpose(image, perm=[1,0,2]) | |
| image = tf.image.flip_left_right(image) | |
| return image | |
| def decode_batch_predictions(input_image): | |
| img_size=(128, 32) | |
| img_byte_array = io.BytesIO() | |
| input_image.save(img_byte_array, format='JPG') # Change the format as needed | |
| input_image = img_byte_array.getvalue() | |
| input_image = tf.io.decode_image(input_image, channels=1, dtype=tf.dtypes.uint8) | |
| input_image = distortion_free_resize(input_image, img_size) | |
| input_image = tf.image.convert_image_dtype(input_image, tf.float32)/255.0 | |
| pred = loaded_model.predict(input_image) | |
| input_len = np.ones(pred.shape[0]) * pred.shape[1] | |
| # Use greedy search. For complex tasks, you can use beam search. | |
| results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][ | |
| :, :max_len | |
| ] | |
| # Iterate over the results and get back the text. | |
| output_text = [] | |
| for res in results: | |
| res = tf.gather(res, tf.where(tf.math.not_equal(res, -1))) | |
| res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8") | |
| output_text.append(res) | |
| return output_text | |
| interface = gr.Interface(fn=decode_batch_predictions, inputs=gr.Image(label="Input image", type="pil"), | |
| outputs='text',title='Captcha Recognition', theme='darkhuggingface') | |
| interface.launch(inline=False) |