File size: 3,203 Bytes
4322a12
 
945b7f0
4322a12
 
945b7f0
4322a12
945b7f0
4322a12
 
 
945b7f0
 
4322a12
945b7f0
 
4322a12
945b7f0
4322a12
945b7f0
4322a12
945b7f0
4322a12
 
 
945b7f0
4322a12
945b7f0
 
 
 
4322a12
 
 
 
 
945b7f0
 
 
4322a12
 
 
 
 
945b7f0
 
4322a12
945b7f0
4322a12
945b7f0
4322a12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
945b7f0
4322a12
 
 
 
 
 
 
 
 
 
 
 
945b7f0
4322a12
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# -*- coding: utf-8 -*-
"""
Final WebApp using Gradio
"""

# Required Libraries
import cv2
import torch
from pytesseract import pytesseract
from transformers import AutoModel, AutoTokenizer
import gradio as gr
import tempfile
import os

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load models for OCR
tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).to(device).eval()

# Tesseract configuration for Hindi OCR
pytesseract.tesseract_cmd = '/usr/bin/tesseract'
tesseract_config = '--oem 3 --psm 6 -l hin'

# OCR function for both English and Hindi
def perform_ocr(img, language):
    # Use a temporary file for the uploaded image
    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_img:
        img.save(temp_img.name)
        img_path = temp_img.name

    res_eng = ""
    res_hin = ""

    if language in ["English", "Both"]:
        # Ensure that inference is done on the correct device (GPU or CPU)
        with torch.no_grad():
            res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')

    if language in ["Hindi", "Both"]:
        img_cv = cv2.imread(img_path)
        res_hin = pytesseract.image_to_string(img_cv, config=tesseract_config)

    # Cleanup temporary file
    os.remove(img_path)

    return res_eng, res_hin

# Keyword Search Functionality
def ocr_and_search(image, language, keyword):
    english_text, hindi_text = perform_ocr(image, language)

    extracted_english = f"Extracted English Text:\n{english_text}" if english_text else "No English text extracted."
    extracted_hindi = f"Extracted Hindi Text:\n{hindi_text}" if hindi_text else "No Hindi text extracted."

    # Search for the keyword in the extracted text
    search_results = []
    if keyword:
        if language in ["English", "Both"] and keyword.lower() in english_text.lower():
            search_results.append(f"Keyword '{keyword}' found in English text.")
        if language in ["Hindi", "Both"] and keyword.lower() in hindi_text.lower():
            search_results.append(f"Keyword '{keyword}' found in Hindi text.")

    search_output = "\n".join(search_results) if search_results else "No matches found."

    return extracted_english, extracted_hindi, search_output

# Gradio Interface Setup
with gr.Blocks() as app:
    gr.Markdown("### OCR Application")
    image_input = gr.Image(type="pil", label="Upload Image")
    language_selection = gr.Radio(choices=["English", "Hindi", "Both"], label="Select Language")
    keyword_input = gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search")
    output_english = gr.Textbox(label="Extracted English Text", interactive=False)
    output_hindi = gr.Textbox(label="Extracted Hindi Text", interactive=False)
    output_search = gr.Textbox(label="Search Results", interactive=False)

    submit_button = gr.Button("Submit")
    submit_button.click(fn=ocr_and_search, inputs=[image_input, language_selection, keyword_input], outputs=[output_english, output_hindi, output_search])

# Launch the Gradio app
app.launch()