import gradio as gr from transformers import AutoModel, AutoTokenizer import os import re # Import regular expressions module # Load the OCR model and tokenizer tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True) model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id).eval().cuda() # Define the function to process images and extract text def extract_text_from_image(image): # Save the uploaded image temporarily image_path = "temp_image.jpg" image.save(image_path) # Call the model to perform OCR extracted_text = model.chat(tokenizer, image_path, ocr_type='ocr') # Remove the temporary image file os.remove(image_path) return extracted_text # Function to search for the keyword in extracted text and highlight it def search_and_highlight_keyword(extracted_text, keyword): if not keyword: return "

Please provide a keyword for searching.

" # Case-insensitive search and replace keyword with tag for highlighting def highlight(match): # Custom background color and text color for highlighting return f"{match.group(0)}" # Use regular expression to find the keyword in a case-insensitive manner pattern = re.compile(re.escape(keyword), re.IGNORECASE) highlighted_text = [] for line in extracted_text.splitlines(): # Split text into lines if re.search(pattern, line): # If keyword is found in the line highlighted_line = re.sub(pattern, highlight, line) # Highlight keyword highlighted_text.append(highlighted_line) if highlighted_text: return '
'.join(highlighted_text) # Join the lines with HTML
for line breaks else: return f"

Keyword '{keyword}' not found in the text.

" # Gradio interface components with gr.Blocks() as demo: # Image upload and OCR gr.Markdown("# OCR and Keyword Search App with Highlighting") image_input = gr.Image(type="pil", label="Upload an Image (JPEG format)") text_output = gr.Textbox(label="Extracted Text", placeholder="Text will appear here after OCR.") extract_button = gr.Button("Extract Text") extract_button.click(fn=extract_text_from_image, inputs=image_input, outputs=text_output) # Keyword search and highlight keyword_input = gr.Textbox(label="Enter Keyword to Search and Highlight") search_result = gr.HTML(label="Highlighted Text with Keyword") search_button = gr.Button("Search and Highlight Keyword") search_button.click(fn=search_and_highlight_keyword, inputs=[text_output, keyword_input], outputs=search_result) # Launch the Gradio app demo.launch(share=True)