yashbyname's picture
Update app.py
a1883c3 verified
raw
history blame
1.27 kB
import gradio as gr
import torch
import pytesseract
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load models and tokenizers
tokenizer_eng = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0")
tokenizer_hin = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0")
model_eng = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0")
model_hin = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0")
def perform_ocr(image, language):
# Set device to CPU or GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
model_eng.to(device)
model_hin.to(device)
# Process the image using the appropriate model
img_path = "path/to/your/image" # Set this path appropriately
res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')
img_cv = cv2.imread(img_path)
# Use pytesseract for Hindi OCR
tesseract_config = '--psm 6'
res_hin = pytesseract.image_to_string(img_cv, lang='hin', config=tesseract_config)
return res_eng, res_hin
def ocr_and_search(image, language):
english_text, hindi_text = perform_ocr(image, language)
return english_text, hindi_text
# Gradio
iface = gr.Interface(fn=ocr_and_search, inputs=["image", "dropdown"], outputs=["text", "text"])
iface.launch()