Spaces:
Paused
Paused
File size: 2,580 Bytes
559732c 51d22a6 559732c c86b97e 559732c 51d22a6 559732c 51d22a6 559732c 51d22a6 c86b97e 559732c 51d22a6 559732c 51d22a6 559732c 51d22a6 559732c 51d22a6 559732c 51d22a6 559732c 51d22a6 559732c 51d22a6 559732c 51d22a6 559732c 51d22a6 559732c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
from transformers import AutoModel, AutoTokenizer
import os
import re
# Load the OCR model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0',
trust_remote_code=True,
low_cpu_mem_usage=True,
device_map='cuda',
use_safetensors=True,
pad_token_id=tokenizer.eos_token_id).eval().cuda()
def extract_text_from_image(image):
image_path = "temp_image.jpg"
image.save(image_path)
extracted_text = model.chat(tokenizer, image_path, ocr_type='ocr')
os.remove(image_path)
return extracted_text
def search_and_highlight_keyword(extracted_text, keyword):
if not keyword:
return "<p>Please provide a keyword for searching.</p>"
def highlight(match):
# Custom background color and text color for highlighting
return f"<mark style='background-color: #ffcc00; color: black;'>{match.group(0)}</mark>"
pattern = re.compile(re.escape(keyword), re.IGNORECASE)
highlighted_text = []
for line in extracted_text.splitlines():
if re.search(pattern, line):
highlighted_line = re.sub(pattern, highlight, line)
highlighted_text.append(highlighted_line)
if highlighted_text:
return '<br>'.join(highlighted_text)
else:
return f"<p>Keyword '{keyword}' not found in the text.</p>"
# Gradio interface components
with gr.Blocks() as demo:
gr.Markdown("# OCR and Keyword Search App with Highlighting")
image_input = gr.Image(type="pil", label="Upload an Image (JPEG format)")
text_output = gr.Textbox(label="Extracted Text", placeholder="Text will appear here after OCR.")
extract_button = gr.Button("Extract Text")
extract_button.click(fn=extract_text_from_image,
inputs=image_input,
outputs=text_output)
keyword_input = gr.Textbox(label="Enter Keyword to Search and Highlight")
search_result = gr.HTML(label="Highlighted Text with Keyword")
search_button = gr.Button("Search and Highlight Keyword")
search_button.click(fn=search_and_highlight_keyword,
inputs=[text_output, keyword_input],
outputs=search_result)
demo.launch(share=True)
|