Spaces:

wangxinhe
/

luogu-captcha-recognition

Running

Downgrade Gradio to v3 in order to use API endpoint

cfc019d verified over 1 year ago

1.38 kB

	import gradio as gr
	import tensorflow as tf
	from huggingface_hub import from_pretrained_keras
	from tensorflow import keras

	num_to_char = keras.layers.StringLookup(
	vocabulary=sorted(
	set("abcdefghijklmnpqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ".upper())
	),
	mask_token=None,
	invert=True,
	)

	model = from_pretrained_keras("wangxinhe/luogu-captcha-recognition", compile=False)

	# Get the prediction model by extracting layers till the output layer
	prediction_model = keras.models.Model(
	model.input[0], model.get_layer(name="dense2").output
	)
	prediction_model.summary()


	def ocr(img):
	# Convert to float32 in [0, 1] range
	img = tf.image.convert_image_dtype(img, tf.float32)
	# Transpose the image because we want the time
	# dimension to correspond to the width of the image.
	img = tf.transpose(img, perm=[1, 0, 2])

	preds = prediction_model(tf.expand_dims(img, axis=0))

	# Use greedy search. For complex tasks, you can use beam search
	results = keras.backend.ctc_decode(
	preds, input_length=[preds.shape[1]], greedy=True
	)[0][0][:, :4]
	return tf.strings.reduce_join(num_to_char(results[0])).numpy().decode("ascii")


	iface = gr.Interface(
	fn=ocr,
	inputs=gr.Image(
	shape=(90, 35),
	source="upload",
	label="CAPTCHA image",
	),
	outputs="textbox",
	)
	iface.launch()