Spaces:

yashbyname
/

OCR_using_GOT_and_Tesseract

Running

App Files Files Community

OCR_using_GOT_and_Tesseract / app.py

yashbyname

Update app.py

add4d26 verified 12 months ago

raw

history blame

1.85 kB

	import gradio as gr
	import torch
	import pytesseract
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import cv2 # Ensure you have OpenCV installed

	# Load models and tokenizers
	tokenizer_eng = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
	tokenizer_hin = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
	model_eng = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
	model_hin = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)

	def perform_ocr(image):
	"""Perform OCR on the image for both English and Hindi."""
	# Set device to CPU or GPU
	device = "cuda" if torch.cuda.is_available() else "cpu"

	model_eng.to(device)
	model_hin.to(device)

	# Convert the input image to an OpenCV format
	img_cv = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Convert to BGR for OpenCV
	img_path = "temp_image.jpg" # Temporary path to save the image
	cv2.imwrite(img_path, img_cv) # Save the image temporarily

	# Use pytesseract for English OCR
	english_text = pytesseract.image_to_string(img_cv)

	# Use pytesseract for Hindi OCR
	tesseract_config = '--psm 6'
	hindi_text = pytesseract.image_to_string(img_cv, lang='hin', config=tesseract_config)

	return english_text, hindi_text

	def ocr_and_search(image):
	"""Process the image and extract text in both languages."""
	english_text, hindi_text = perform_ocr(image)
	return english_text, hindi_text

	# Gradio interface
	iface = gr.Interface(
	fn=ocr_and_search,
	inputs=gr.inputs.Image(type="numpy"), # Use numpy array for OpenCV
	outputs=["text", "text"],
	title="OCR for English and Hindi",
	description="Upload an image to extract text in English and Hindi."
	)

	# Launch the interface
	iface.launch()