Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
os.environ["KERAS_BACKEND"] = "tensorflow"
|
3 |
+
import keras
|
4 |
+
import numpy as np
|
5 |
+
from PIL import Image
|
6 |
+
import tensorflow as tf
|
7 |
+
from keras import layers
|
8 |
+
from pathlib import Path
|
9 |
+
from pathlib import Path
|
10 |
+
from collections import Counter
|
11 |
+
|
12 |
+
def ctc_batch_cost(y_true, y_pred, input_length, label_length):
|
13 |
+
label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32)
|
14 |
+
input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32)
|
15 |
+
sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32)
|
16 |
+
|
17 |
+
y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon())
|
18 |
+
|
19 |
+
return tf.expand_dims(
|
20 |
+
tf.compat.v1.nn.ctc_loss(
|
21 |
+
inputs=y_pred, labels=sparse_labels, sequence_length=input_length
|
22 |
+
),
|
23 |
+
1,
|
24 |
+
)
|
25 |
+
|
26 |
+
|
27 |
+
def ctc_label_dense_to_sparse(labels, label_lengths):
|
28 |
+
label_shape = tf.shape(labels)
|
29 |
+
num_batches_tns = tf.stack([label_shape[0]])
|
30 |
+
max_num_labels_tns = tf.stack([label_shape[1]])
|
31 |
+
|
32 |
+
def range_less_than(old_input, current_input):
|
33 |
+
return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill(
|
34 |
+
max_num_labels_tns, current_input
|
35 |
+
)
|
36 |
+
|
37 |
+
init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool)
|
38 |
+
dense_mask = tf.compat.v1.scan(
|
39 |
+
range_less_than, label_lengths, initializer=init, parallel_iterations=1
|
40 |
+
)
|
41 |
+
dense_mask = dense_mask[:, 0, :]
|
42 |
+
|
43 |
+
label_array = tf.reshape(
|
44 |
+
tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape
|
45 |
+
)
|
46 |
+
label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask)
|
47 |
+
|
48 |
+
batch_array = tf.transpose(
|
49 |
+
tf.reshape(
|
50 |
+
tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns),
|
51 |
+
tf.reverse(label_shape, [0]),
|
52 |
+
)
|
53 |
+
)
|
54 |
+
batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask)
|
55 |
+
indices = tf.transpose(
|
56 |
+
tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1])
|
57 |
+
)
|
58 |
+
|
59 |
+
vals_sparse = tf.compat.v1.gather_nd(labels, indices)
|
60 |
+
|
61 |
+
return tf.SparseTensor(
|
62 |
+
tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64)
|
63 |
+
)
|
64 |
+
|
65 |
+
|
66 |
+
class CTCLayer(layers.Layer):
|
67 |
+
def __init__(self, name=None):
|
68 |
+
super().__init__(name=name)
|
69 |
+
self.loss_fn = ctc_batch_cost
|
70 |
+
|
71 |
+
def call(self, y_true, y_pred):
|
72 |
+
# Compute the training-time loss value and add it to the layer using `self.add_loss()`.
|
73 |
+
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
|
74 |
+
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
|
75 |
+
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
|
76 |
+
|
77 |
+
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
|
78 |
+
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
|
79 |
+
|
80 |
+
loss = self.loss_fn(y_true, y_pred, input_length, label_length)
|
81 |
+
self.add_loss(loss)
|
82 |
+
|
83 |
+
# At test time, just return the computed predictions
|
84 |
+
return y_pred
|
85 |
+
|
86 |
+
loaded_model = keras.models.load_model("/kaggle/working/ocr_model_pred.h5", custom_objects={"CTCLayer": CTCLayer})
|
87 |
+
loaded_model.load_weights("/kaggle/working/ocr_model_pred_weights.h5")
|
88 |
+
max_len = 5
|
89 |
+
|
90 |
+
characters = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
|
91 |
+
# Mapping characters to integers
|
92 |
+
char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)
|
93 |
+
|
94 |
+
# Mapping integers back to original characters
|
95 |
+
num_to_char = layers.StringLookup(
|
96 |
+
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
|
97 |
+
)
|
98 |
+
|
99 |
+
def decode_batch_predictions(pred):
|
100 |
+
input_len = np.ones(pred.shape[0]) * pred.shape[1]
|
101 |
+
# Use greedy search. For complex tasks, you can use beam search.
|
102 |
+
results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
|
103 |
+
:, :max_len
|
104 |
+
]
|
105 |
+
|
106 |
+
# Iterate over the results and get back the text.
|
107 |
+
output_text = []
|
108 |
+
|
109 |
+
for res in results:
|
110 |
+
res = tf.gather(res, tf.where(tf.math.not_equal(res, -1)))
|
111 |
+
res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
|
112 |
+
output_text.append(res)
|
113 |
+
|
114 |
+
return output_text
|
115 |
+
|
116 |
+
interface = gr.Interface(fn=decode_batch_predictions, inputs=gr.Image(label="Input image", type="pil"),
|
117 |
+
outputs='text',title='Captcha Recognition', theme='darkhuggingface')
|
118 |
+
interface.launch(inline=False)
|