Spaces:

omvishesh
/

OCR-app

Paused

App Files Files Community

omvishesh commited on Sep 30, 2024

Commit

51d22a6

verified ·

1 Parent(s): 89decca

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -15

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 import os
-import re  # Import regular expressions module
 # Load the OCR model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
@@ -12,47 +12,47 @@ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0',
                                   use_safetensors=True,
                                   pad_token_id=tokenizer.eos_token_id).eval().cuda()
-# Define the function to process images and extract text
 def extract_text_from_image(image):
-    # Save the uploaded image temporarily
     image_path = "temp_image.jpg"
     image.save(image_path)
-    # Call the model to perform OCR
     extracted_text = model.chat(tokenizer, image_path, ocr_type='ocr')
-    # Remove the temporary image file
     os.remove(image_path)
     return extracted_text
-# Function to search for the keyword in extracted text and highlight it
 def search_and_highlight_keyword(extracted_text, keyword):
     if not keyword:
         return "<p>Please provide a keyword for searching.</p>"
-    # Case-insensitive search and replace keyword with <mark> tag for highlighting
     def highlight(match):
         # Custom background color and text color for highlighting
         return f"<mark style='background-color: #ffcc00; color: black;'>{match.group(0)}</mark>"
-    # Use regular expression to find the keyword in a case-insensitive manner
     pattern = re.compile(re.escape(keyword), re.IGNORECASE)
     highlighted_text = []
-    for line in extracted_text.splitlines():  # Split text into lines
-        if re.search(pattern, line):  # If keyword is found in the line
-            highlighted_line = re.sub(pattern, highlight, line)  # Highlight keyword
             highlighted_text.append(highlighted_line)
     if highlighted_text:
-        return '<br>'.join(highlighted_text)  # Join the lines with HTML <br> for line breaks
     else:
         return f"<p>Keyword '{keyword}' not found in the text.</p>"
 # Gradio interface components
 with gr.Blocks() as demo:
-    # Image upload and OCR
     gr.Markdown("# OCR and Keyword Search App with Highlighting")
     image_input = gr.Image(type="pil", label="Upload an Image (JPEG format)")
@@ -64,7 +64,7 @@ with gr.Blocks() as demo:
                          inputs=image_input,
                          outputs=text_output)
-    # Keyword search and highlight
     keyword_input = gr.Textbox(label="Enter Keyword to Search and Highlight")
     search_result = gr.HTML(label="Highlighted Text with Keyword")
@@ -74,5 +74,5 @@ with gr.Blocks() as demo:
                         inputs=[text_output, keyword_input],
                         outputs=search_result)
-# Launch the Gradio app
 demo.launch(share=True)

 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 import os
+import re
 # Load the OCR model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
                                   use_safetensors=True,
                                   pad_token_id=tokenizer.eos_token_id).eval().cuda()
 def extract_text_from_image(image):
     image_path = "temp_image.jpg"
     image.save(image_path)
     extracted_text = model.chat(tokenizer, image_path, ocr_type='ocr')
     os.remove(image_path)
     return extracted_text
 def search_and_highlight_keyword(extracted_text, keyword):
     if not keyword:
         return "<p>Please provide a keyword for searching.</p>"
     def highlight(match):
         # Custom background color and text color for highlighting
         return f"<mark style='background-color: #ffcc00; color: black;'>{match.group(0)}</mark>"
     pattern = re.compile(re.escape(keyword), re.IGNORECASE)
     highlighted_text = []
+    for line in extracted_text.splitlines():
+        if re.search(pattern, line):
+            highlighted_line = re.sub(pattern, highlight, line)
             highlighted_text.append(highlighted_line)
     if highlighted_text:
+        return '<br>'.join(highlighted_text)
     else:
         return f"<p>Keyword '{keyword}' not found in the text.</p>"
 # Gradio interface components
 with gr.Blocks() as demo:
     gr.Markdown("# OCR and Keyword Search App with Highlighting")
     image_input = gr.Image(type="pil", label="Upload an Image (JPEG format)")
                          inputs=image_input,
                          outputs=text_output)
     keyword_input = gr.Textbox(label="Enter Keyword to Search and Highlight")
     search_result = gr.HTML(label="Highlighted Text with Keyword")
                         inputs=[text_output, keyword_input],
                         outputs=search_result)
 demo.launch(share=True)