Spaces:

Puyush
/

Captcha_Recognition

Sleeping

App Files Files Community

Puyush commited on Jan 21, 2024

Commit

14e6eb4

verified ·

1 Parent(s): d032d26

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -0

app.py CHANGED Viewed

	@@ -0,0 +1,118 @@

+import os
+os.environ["KERAS_BACKEND"] = "tensorflow"
+import keras
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+from keras import layers
+from pathlib import Path
+from pathlib import Path
+from collections import Counter
+def ctc_batch_cost(y_true, y_pred, input_length, label_length):
+    label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32)
+    input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32)
+    sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32)
+    y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon())
+    return tf.expand_dims(
+        tf.compat.v1.nn.ctc_loss(
+            inputs=y_pred, labels=sparse_labels, sequence_length=input_length
+        ),
+        1,
+    )
+def ctc_label_dense_to_sparse(labels, label_lengths):
+    label_shape = tf.shape(labels)
+    num_batches_tns = tf.stack([label_shape[0]])
+    max_num_labels_tns = tf.stack([label_shape[1]])
+    def range_less_than(old_input, current_input):
+        return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill(
+            max_num_labels_tns, current_input
+        )
+    init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool)
+    dense_mask = tf.compat.v1.scan(
+        range_less_than, label_lengths, initializer=init, parallel_iterations=1
+    )
+    dense_mask = dense_mask[:, 0, :]
+    label_array = tf.reshape(
+        tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape
+    )
+    label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask)
+    batch_array = tf.transpose(
+        tf.reshape(
+            tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns),
+            tf.reverse(label_shape, [0]),
+        )
+    )
+    batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask)
+    indices = tf.transpose(
+        tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1])
+    )
+    vals_sparse = tf.compat.v1.gather_nd(labels, indices)
+    return tf.SparseTensor(
+        tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64)
+    )
+class CTCLayer(layers.Layer):
+    def __init__(self, name=None):
+        super().__init__(name=name)
+        self.loss_fn = ctc_batch_cost
+    def call(self, y_true, y_pred):
+        # Compute the training-time loss value and add it to the layer using `self.add_loss()`.
+        batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
+        input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
+        label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
+        input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
+        label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
+        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
+        self.add_loss(loss)
+        # At test time, just return the computed predictions
+        return y_pred
+loaded_model = keras.models.load_model("/kaggle/working/ocr_model_pred.h5", custom_objects={"CTCLayer": CTCLayer})
+loaded_model.load_weights("/kaggle/working/ocr_model_pred_weights.h5")
+max_len = 5
+characters = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+# Mapping characters to integers
+char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)
+# Mapping integers back to original characters
+num_to_char = layers.StringLookup(
+    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
+)
+def decode_batch_predictions(pred):
+    input_len = np.ones(pred.shape[0]) * pred.shape[1]
+    # Use greedy search. For complex tasks, you can use beam search.
+    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
+        :, :max_len
+    ]
+    # Iterate over the results and get back the text.
+    output_text = []
+    for res in results:
+      res = tf.gather(res, tf.where(tf.math.not_equal(res, -1)))
+      res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
+      output_text.append(res)
+    return output_text
+interface = gr.Interface(fn=decode_batch_predictions, inputs=gr.Image(label="Input image", type="pil"),
+                         outputs='text',title='Captcha Recognition', theme='darkhuggingface')
+interface.launch(inline=False)