Spaces:

yashbyname
/

OCR_using_GOT_and_Tesseract

Running

File size: 1,849 Bytes

4322a12
3be6a72
 
 
add4d26
3be6a72
 
add4d26
 
 
 
3be6a72
add4d26
 
3be6a72
 
 
 
 
 
add4d26
 
 
 
 
 
 
 
3be6a72
 
add4d26
4322a12
add4d26
4322a12
add4d26
 
 
3be6a72
4322a12
add4d26
 
 
 
 
 
 
 
 
 
3be6a72

import gradio as gr
import torch
import pytesseract
from transformers import AutoTokenizer, AutoModelForCausalLM
import cv2  # Ensure you have OpenCV installed

# Load models and tokenizers
tokenizer_eng = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
tokenizer_hin = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
model_eng = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
model_hin = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)

def perform_ocr(image):
    """Perform OCR on the image for both English and Hindi."""
    # Set device to CPU or GPU
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    model_eng.to(device)
    model_hin.to(device)

    # Convert the input image to an OpenCV format
    img_cv = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # Convert to BGR for OpenCV
    img_path = "temp_image.jpg"  # Temporary path to save the image
    cv2.imwrite(img_path, img_cv)  # Save the image temporarily

    # Use pytesseract for English OCR
    english_text = pytesseract.image_to_string(img_cv)

    # Use pytesseract for Hindi OCR
    tesseract_config = '--psm 6'
    hindi_text = pytesseract.image_to_string(img_cv, lang='hin', config=tesseract_config)

    return english_text, hindi_text

def ocr_and_search(image):
    """Process the image and extract text in both languages."""
    english_text, hindi_text = perform_ocr(image)
    return english_text, hindi_text

# Gradio interface
iface = gr.Interface(
    fn=ocr_and_search,
    inputs=gr.inputs.Image(type="numpy"),  # Use numpy array for OpenCV
    outputs=["text", "text"],
    title="OCR for English and Hindi",
    description="Upload an image to extract text in English and Hindi."
)

# Launch the interface
iface.launch()