File size: 1,849 Bytes
4322a12 3be6a72 add4d26 3be6a72 add4d26 3be6a72 add4d26 3be6a72 add4d26 3be6a72 add4d26 4322a12 add4d26 4322a12 add4d26 3be6a72 4322a12 add4d26 3be6a72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
import torch
import pytesseract
from transformers import AutoTokenizer, AutoModelForCausalLM
import cv2 # Ensure you have OpenCV installed
# Load models and tokenizers
tokenizer_eng = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
tokenizer_hin = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
model_eng = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
model_hin = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
def perform_ocr(image):
"""Perform OCR on the image for both English and Hindi."""
# Set device to CPU or GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
model_eng.to(device)
model_hin.to(device)
# Convert the input image to an OpenCV format
img_cv = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Convert to BGR for OpenCV
img_path = "temp_image.jpg" # Temporary path to save the image
cv2.imwrite(img_path, img_cv) # Save the image temporarily
# Use pytesseract for English OCR
english_text = pytesseract.image_to_string(img_cv)
# Use pytesseract for Hindi OCR
tesseract_config = '--psm 6'
hindi_text = pytesseract.image_to_string(img_cv, lang='hin', config=tesseract_config)
return english_text, hindi_text
def ocr_and_search(image):
"""Process the image and extract text in both languages."""
english_text, hindi_text = perform_ocr(image)
return english_text, hindi_text
# Gradio interface
iface = gr.Interface(
fn=ocr_and_search,
inputs=gr.inputs.Image(type="numpy"), # Use numpy array for OpenCV
outputs=["text", "text"],
title="OCR for English and Hindi",
description="Upload an image to extract text in English and Hindi."
)
# Launch the interface
iface.launch() |