File size: 1,270 Bytes
4322a12
3be6a72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4322a12
945b7f0
4322a12
3be6a72
98106bd
3be6a72
4322a12
a1883c3
3be6a72
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gradio as gr
import torch
import pytesseract
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load models and tokenizers
tokenizer_eng = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0")
tokenizer_hin = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0")
model_eng = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0")
model_hin = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0")

def perform_ocr(image, language):
    # Set device to CPU or GPU
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    model_eng.to(device)
    model_hin.to(device)

    # Process the image using the appropriate model
    img_path = "path/to/your/image"  # Set this path appropriately
    res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')
    img_cv = cv2.imread(img_path)
    
    # Use pytesseract for Hindi OCR
    tesseract_config = '--psm 6'
    res_hin = pytesseract.image_to_string(img_cv, lang='hin', config=tesseract_config)

    return res_eng, res_hin

def ocr_and_search(image, language):
    english_text, hindi_text = perform_ocr(image, language)
    return english_text, hindi_text

# Gradio
iface = gr.Interface(fn=ocr_and_search, inputs=["image", "dropdown"], outputs=["text", "text"])
iface.launch()