Spaces:

Puyush
/

Captcha_Recognition

Sleeping

App Files Files Community

Captcha_Recognition / app.py

Puyush

Update app.py

5f19fd8 verified almost 2 years ago

raw

history blame

5.91 kB

	import os
	import io
	os.environ["KERAS_BACKEND"] = "tensorflow"
	import keras
	import numpy as np
	from PIL import Image
	import gradio as gr
	import tensorflow as tf
	from keras import layers
	from pathlib import Path
	from collections import Counter

	def ctc_batch_cost(y_true, y_pred, input_length, label_length):
	label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32)
	input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32)
	sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32)

	y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon())

	return tf.expand_dims(
	tf.compat.v1.nn.ctc_loss(
	inputs=y_pred, labels=sparse_labels, sequence_length=input_length
	),
	1,
	)


	def ctc_label_dense_to_sparse(labels, label_lengths):
	label_shape = tf.shape(labels)
	num_batches_tns = tf.stack([label_shape[0]])
	max_num_labels_tns = tf.stack([label_shape[1]])

	def range_less_than(old_input, current_input):
	return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill(
	max_num_labels_tns, current_input
	)

	init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool)
	dense_mask = tf.compat.v1.scan(
	range_less_than, label_lengths, initializer=init, parallel_iterations=1
	)
	dense_mask = dense_mask[:, 0, :]

	label_array = tf.reshape(
	tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape
	)
	label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask)

	batch_array = tf.transpose(
	tf.reshape(
	tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns),
	tf.reverse(label_shape, [0]),
	)
	)
	batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask)
	indices = tf.transpose(
	tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1])
	)

	vals_sparse = tf.compat.v1.gather_nd(labels, indices)

	return tf.SparseTensor(
	tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64)
	)


	class CTCLayer(layers.Layer):
	def __init__(self, name=None):
	super().__init__(name=name)
	self.loss_fn = ctc_batch_cost

	def call(self, y_true, y_pred):
	# Compute the training-time loss value and add it to the layer using `self.add_loss()`.
	batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
	input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
	label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

	input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
	label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

	loss = self.loss_fn(y_true, y_pred, input_length, label_length)
	self.add_loss(loss)

	# At test time, just return the computed predictions
	return y_pred

	loaded_model = keras.models.load_model("ocr_model_pred.h5", custom_objects={"CTCLayer": CTCLayer})
	loaded_model.load_weights("ocr_model_pred_weights.h5")
	max_len = 5

	characters = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
	# Mapping characters to integers
	char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)

	# Mapping integers back to original characters
	num_to_char = layers.StringLookup(
	vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
	)

	def distortion_free_resize(image, img_size):
	w, h = img_size
	image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)

	# Check tha amount of padding needed to be done.
	pad_height = h - tf.shape(image)[0]
	pad_width = w - tf.shape(image)[1]

	# only necessary if you want to do same amount of padding on both sides.
	if pad_height % 2 != 0:
	height = pad_height // 2
	pad_height_top = height +1
	pad_height_bottom = height
	else:
	pad_height_top = pad_height_bottom = pad_height // 2

	if pad_width % 2 != 0:
	width = pad_width // 2
	pad_width_left = width + 1
	pad_width_right = width
	else:
	pad_width_left = pad_width_right = pad_width // 2

	image = tf.pad(
	image, paddings=[
	[pad_height_top, pad_height_bottom],
	[pad_width_left, pad_width_right],
	[0, 0],
	],)
	image = tf.transpose(image, perm=[1,0,2])
	image = tf.image.flip_left_right(image)
	return image

	def decode_batch_predictions(input_image):
	img_size=(128, 32)
	img_byte_array = io.BytesIO()
	input_image.save(img_byte_array, format='JPG') # Change the format as needed
	input_image = img_byte_array.getvalue()
	input_image = tf.io.decode_image(input_image, channels=1, dtype=tf.dtypes.uint8)
	input_image = distortion_free_resize(input_image, img_size)
	input_image = tf.image.convert_image_dtype(input_image, tf.float32)/255.0

	pred = loaded_model.predict(input_image)
	input_len = np.ones(pred.shape[0]) * pred.shape[1]
	# Use greedy search. For complex tasks, you can use beam search.
	results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
	:, :max_len
	]

	# Iterate over the results and get back the text.
	output_text = []

	for res in results:
	res = tf.gather(res, tf.where(tf.math.not_equal(res, -1)))
	res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
	output_text.append(res)

	return output_text

	interface = gr.Interface(fn=decode_batch_predictions, inputs=gr.Image(label="Input image", type="pil"),
	outputs='text',title='Captcha Recognition', theme='darkhuggingface')
	interface.launch(inline=False)