File size: 3,203 Bytes
4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 945b7f0 4322a12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# -*- coding: utf-8 -*-
"""
Final WebApp using Gradio
"""
# Required Libraries
import cv2
import torch
from pytesseract import pytesseract
from transformers import AutoModel, AutoTokenizer
import gradio as gr
import tempfile
import os
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Load models for OCR
tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).to(device).eval()
# Tesseract configuration for Hindi OCR
pytesseract.tesseract_cmd = '/usr/bin/tesseract'
tesseract_config = '--oem 3 --psm 6 -l hin'
# OCR function for both English and Hindi
def perform_ocr(img, language):
# Use a temporary file for the uploaded image
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_img:
img.save(temp_img.name)
img_path = temp_img.name
res_eng = ""
res_hin = ""
if language in ["English", "Both"]:
# Ensure that inference is done on the correct device (GPU or CPU)
with torch.no_grad():
res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')
if language in ["Hindi", "Both"]:
img_cv = cv2.imread(img_path)
res_hin = pytesseract.image_to_string(img_cv, config=tesseract_config)
# Cleanup temporary file
os.remove(img_path)
return res_eng, res_hin
# Keyword Search Functionality
def ocr_and_search(image, language, keyword):
english_text, hindi_text = perform_ocr(image, language)
extracted_english = f"Extracted English Text:\n{english_text}" if english_text else "No English text extracted."
extracted_hindi = f"Extracted Hindi Text:\n{hindi_text}" if hindi_text else "No Hindi text extracted."
# Search for the keyword in the extracted text
search_results = []
if keyword:
if language in ["English", "Both"] and keyword.lower() in english_text.lower():
search_results.append(f"Keyword '{keyword}' found in English text.")
if language in ["Hindi", "Both"] and keyword.lower() in hindi_text.lower():
search_results.append(f"Keyword '{keyword}' found in Hindi text.")
search_output = "\n".join(search_results) if search_results else "No matches found."
return extracted_english, extracted_hindi, search_output
# Gradio Interface Setup
with gr.Blocks() as app:
gr.Markdown("### OCR Application")
image_input = gr.Image(type="pil", label="Upload Image")
language_selection = gr.Radio(choices=["English", "Hindi", "Both"], label="Select Language")
keyword_input = gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search")
output_english = gr.Textbox(label="Extracted English Text", interactive=False)
output_hindi = gr.Textbox(label="Extracted Hindi Text", interactive=False)
output_search = gr.Textbox(label="Search Results", interactive=False)
submit_button = gr.Button("Submit")
submit_button.click(fn=ocr_and_search, inputs=[image_input, language_selection, keyword_input], outputs=[output_english, output_hindi, output_search])
# Launch the Gradio app
app.launch() |