Spaces:

omvishesh
/

OCR-app

Paused

App Files Files Community

omvishesh commited on Sep 29, 2024

Commit

559732c

verified ·

1 Parent(s): 715fb0b

Create app.py

Browse files

Files changed (1) hide show

app.py +78 -0

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import gradio as gr
+from transformers import AutoModel, AutoTokenizer
+import os
+import re  # Import regular expressions module
+# Load the OCR model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0',
+                                  trust_remote_code=True,
+                                  low_cpu_mem_usage=True,
+                                  device_map='cuda',
+                                  use_safetensors=True,
+                                  pad_token_id=tokenizer.eos_token_id).eval().cuda()
+# Define the function to process images and extract text
+def extract_text_from_image(image):
+    # Save the uploaded image temporarily
+    image_path = "temp_image.jpg"
+    image.save(image_path)
+    # Call the model to perform OCR
+    extracted_text = model.chat(tokenizer, image_path, ocr_type='ocr')
+    # Remove the temporary image file
+    os.remove(image_path)
+    return extracted_text
+# Function to search for the keyword in extracted text and highlight it
+def search_and_highlight_keyword(extracted_text, keyword):
+    if not keyword:
+        return "<p>Please provide a keyword for searching.</p>"
+    # Case-insensitive search and replace keyword with <mark> tag for highlighting
+    def highlight(match):
+        # Custom background color and text color for highlighting
+        return f"<mark style='background-color: #ffcc00; color: black;'>{match.group(0)}</mark>"
+    # Use regular expression to find the keyword in a case-insensitive manner
+    pattern = re.compile(re.escape(keyword), re.IGNORECASE)
+    highlighted_text = []
+    for line in extracted_text.splitlines():  # Split text into lines
+        if re.search(pattern, line):  # If keyword is found in the line
+            highlighted_line = re.sub(pattern, highlight, line)  # Highlight keyword
+            highlighted_text.append(highlighted_line)
+    if highlighted_text:
+        return '<br>'.join(highlighted_text)  # Join the lines with HTML <br> for line breaks
+    else:
+        return f"<p>Keyword '{keyword}' not found in the text.</p>"
+# Gradio interface components
+with gr.Blocks() as demo:
+    # Image upload and OCR
+    gr.Markdown("# OCR and Keyword Search App with Highlighting")
+    image_input = gr.Image(type="pil", label="Upload an Image (JPEG format)")
+    text_output = gr.Textbox(label="Extracted Text", placeholder="Text will appear here after OCR.")
+    extract_button = gr.Button("Extract Text")
+    extract_button.click(fn=extract_text_from_image,
+                         inputs=image_input,
+                         outputs=text_output)
+    # Keyword search and highlight
+    keyword_input = gr.Textbox(label="Enter Keyword to Search and Highlight")
+    search_result = gr.HTML(label="Highlighted Text with Keyword")
+    search_button = gr.Button("Search and Highlight Keyword")
+    search_button.click(fn=search_and_highlight_keyword,
+                        inputs=[text_output, keyword_input],
+                        outputs=search_result)
+# Launch the Gradio app
+demo.launch(share=True)