|
import gradio as gr |
|
import torch |
|
import pytesseract |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import cv2 |
|
|
|
|
|
tokenizer_eng = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True) |
|
tokenizer_hin = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True) |
|
model_eng = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True) |
|
model_hin = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True) |
|
|
|
def perform_ocr(image): |
|
"""Perform OCR on the image for both English and Hindi.""" |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
model_eng.to(device) |
|
model_hin.to(device) |
|
|
|
|
|
img_cv = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) |
|
img_path = "temp_image.jpg" |
|
cv2.imwrite(img_path, img_cv) |
|
|
|
|
|
english_text = pytesseract.image_to_string(img_cv) |
|
|
|
|
|
tesseract_config = '--psm 6' |
|
hindi_text = pytesseract.image_to_string(img_cv, lang='hin', config=tesseract_config) |
|
|
|
return english_text, hindi_text |
|
|
|
def ocr_and_search(image): |
|
"""Process the image and extract text in both languages.""" |
|
english_text, hindi_text = perform_ocr(image) |
|
return english_text, hindi_text |
|
|
|
|
|
iface = gr.Interface( |
|
fn=ocr_and_search, |
|
inputs=gr.inputs.Image(type="numpy"), |
|
outputs=["text", "text"], |
|
title="OCR for English and Hindi", |
|
description="Upload an image to extract text in English and Hindi." |
|
) |
|
|
|
|
|
iface.launch() |