omvishesh commited on
Commit
51d22a6
·
verified ·
1 Parent(s): 89decca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from transformers import AutoModel, AutoTokenizer
3
  import os
4
- import re # Import regular expressions module
5
 
6
  # Load the OCR model and tokenizer
7
  tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
@@ -12,47 +12,47 @@ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0',
12
  use_safetensors=True,
13
  pad_token_id=tokenizer.eos_token_id).eval().cuda()
14
 
15
- # Define the function to process images and extract text
16
  def extract_text_from_image(image):
17
- # Save the uploaded image temporarily
18
  image_path = "temp_image.jpg"
19
  image.save(image_path)
20
 
21
- # Call the model to perform OCR
22
  extracted_text = model.chat(tokenizer, image_path, ocr_type='ocr')
23
 
24
- # Remove the temporary image file
25
  os.remove(image_path)
26
 
27
  return extracted_text
28
 
29
- # Function to search for the keyword in extracted text and highlight it
30
  def search_and_highlight_keyword(extracted_text, keyword):
31
  if not keyword:
32
  return "<p>Please provide a keyword for searching.</p>"
33
 
34
- # Case-insensitive search and replace keyword with <mark> tag for highlighting
35
  def highlight(match):
36
  # Custom background color and text color for highlighting
37
  return f"<mark style='background-color: #ffcc00; color: black;'>{match.group(0)}</mark>"
38
 
39
- # Use regular expression to find the keyword in a case-insensitive manner
40
  pattern = re.compile(re.escape(keyword), re.IGNORECASE)
41
 
42
  highlighted_text = []
43
- for line in extracted_text.splitlines(): # Split text into lines
44
- if re.search(pattern, line): # If keyword is found in the line
45
- highlighted_line = re.sub(pattern, highlight, line) # Highlight keyword
46
  highlighted_text.append(highlighted_line)
47
 
48
  if highlighted_text:
49
- return '<br>'.join(highlighted_text) # Join the lines with HTML <br> for line breaks
50
  else:
51
  return f"<p>Keyword '{keyword}' not found in the text.</p>"
52
 
53
  # Gradio interface components
54
  with gr.Blocks() as demo:
55
- # Image upload and OCR
56
  gr.Markdown("# OCR and Keyword Search App with Highlighting")
57
 
58
  image_input = gr.Image(type="pil", label="Upload an Image (JPEG format)")
@@ -64,7 +64,7 @@ with gr.Blocks() as demo:
64
  inputs=image_input,
65
  outputs=text_output)
66
 
67
- # Keyword search and highlight
68
  keyword_input = gr.Textbox(label="Enter Keyword to Search and Highlight")
69
  search_result = gr.HTML(label="Highlighted Text with Keyword")
70
 
@@ -74,5 +74,5 @@ with gr.Blocks() as demo:
74
  inputs=[text_output, keyword_input],
75
  outputs=search_result)
76
 
77
- # Launch the Gradio app
78
  demo.launch(share=True)
 
1
  import gradio as gr
2
  from transformers import AutoModel, AutoTokenizer
3
  import os
4
+ import re
5
 
6
  # Load the OCR model and tokenizer
7
  tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
 
12
  use_safetensors=True,
13
  pad_token_id=tokenizer.eos_token_id).eval().cuda()
14
 
15
+
16
  def extract_text_from_image(image):
17
+
18
  image_path = "temp_image.jpg"
19
  image.save(image_path)
20
 
21
+
22
  extracted_text = model.chat(tokenizer, image_path, ocr_type='ocr')
23
 
24
+
25
  os.remove(image_path)
26
 
27
  return extracted_text
28
 
29
+
30
  def search_and_highlight_keyword(extracted_text, keyword):
31
  if not keyword:
32
  return "<p>Please provide a keyword for searching.</p>"
33
 
34
+
35
  def highlight(match):
36
  # Custom background color and text color for highlighting
37
  return f"<mark style='background-color: #ffcc00; color: black;'>{match.group(0)}</mark>"
38
 
39
+
40
  pattern = re.compile(re.escape(keyword), re.IGNORECASE)
41
 
42
  highlighted_text = []
43
+ for line in extracted_text.splitlines():
44
+ if re.search(pattern, line):
45
+ highlighted_line = re.sub(pattern, highlight, line)
46
  highlighted_text.append(highlighted_line)
47
 
48
  if highlighted_text:
49
+ return '<br>'.join(highlighted_text)
50
  else:
51
  return f"<p>Keyword '{keyword}' not found in the text.</p>"
52
 
53
  # Gradio interface components
54
  with gr.Blocks() as demo:
55
+
56
  gr.Markdown("# OCR and Keyword Search App with Highlighting")
57
 
58
  image_input = gr.Image(type="pil", label="Upload an Image (JPEG format)")
 
64
  inputs=image_input,
65
  outputs=text_output)
66
 
67
+
68
  keyword_input = gr.Textbox(label="Enter Keyword to Search and Highlight")
69
  search_result = gr.HTML(label="Highlighted Text with Keyword")
70
 
 
74
  inputs=[text_output, keyword_input],
75
  outputs=search_result)
76
 
77
+
78
  demo.launch(share=True)