yashbyname commited on
Commit
945b7f0
·
verified ·
1 Parent(s): 23f6833

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -49
app.py CHANGED
@@ -1,69 +1,54 @@
1
  # -*- coding: utf-8 -*-
2
- """Final WebApp using Gradio.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1a5-p_KZd9Hk0tsKZ_JoqoYeRD3XOQtRK
8
-
9
- # **Task 2 - Web App Development with Gradio**
10
-
11
- ## **Gradio Interface for OCR Application**
12
-
13
- In this notebook, I created an interactive web application using Gradio to facilitate the OCR process and allow users to perform keyword searches on the extracted text.
14
  """
15
-
16
- #!pip install gradio
17
- #!pip install -q tiktoken verovio
18
- #!pip install pytesseract
19
-
20
- """**Library Imports**:
21
-
22
- - In addition to libraries from the first notebook, I imported `gradio` to build the user interface for the application.
23
  """
24
 
 
25
  import cv2
 
26
  from pytesseract import pytesseract
27
  from transformers import AutoModel, AutoTokenizer
28
  import gradio as gr
 
 
29
 
30
- """**Model and Tesseract Configuration**:
31
- - Similar to the first notebook, I loaded the GOT2 model for English text and configured Tesseract for Hindi text.
32
- """
33
 
 
34
  tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
35
- model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).eval()
36
 
 
37
  pytesseract.tesseract_cmd = '/usr/bin/tesseract'
38
  tesseract_config = '--oem 3 --psm 6 -l hin'
39
 
40
- """**Perform OCR Function**:
41
- - The `perform_ocr` function was adapted to handle image input from the Gradio interface. This function processes the uploaded image based on the selected language and returns the extracted English and Hindi texts.
42
- """
43
-
44
  def perform_ocr(img, language):
45
-
46
- img_path = "/tmp/uploaded_image.png"
47
- img.save(img_path)
 
48
 
49
  res_eng = ""
50
  res_hin = ""
51
 
52
  if language in ["English", "Both"]:
53
- res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')
 
 
54
 
55
  if language in ["Hindi", "Both"]:
56
  img_cv = cv2.imread(img_path)
57
  res_hin = pytesseract.image_to_string(img_cv, config=tesseract_config)
58
 
59
- return res_eng, res_hin
 
60
 
61
- """**Keyword Search Functionality**:
62
- - A new function, `ocr_and_search`, was implemented to allow users to search for keywords within the extracted text. It checks for keyword matches in both English and Hindi texts, providing appropriate feedback.
63
- """
64
 
 
65
  def ocr_and_search(image, language, keyword):
66
-
67
  english_text, hindi_text = perform_ocr(image, language)
68
 
69
  extracted_english = f"Extracted English Text:\n{english_text}" if english_text else "No English text extracted."
@@ -72,10 +57,8 @@ def ocr_and_search(image, language, keyword):
72
  # Search for the keyword in the extracted text
73
  search_results = []
74
  if keyword:
75
-
76
  if language in ["English", "Both"] and keyword.lower() in english_text.lower():
77
  search_results.append(f"Keyword '{keyword}' found in English text.")
78
-
79
  if language in ["Hindi", "Both"] and keyword.lower() in hindi_text.lower():
80
  search_results.append(f"Keyword '{keyword}' found in Hindi text.")
81
 
@@ -83,12 +66,7 @@ def ocr_and_search(image, language, keyword):
83
 
84
  return extracted_english, extracted_hindi, search_output
85
 
86
- """**Gradio Interface Setup**:
87
- - The user interface is constructed using Gradio's Blocks API, allowing users to upload images, select the desired language for OCR, and enter a keyword for search.
88
- - The outputs are displayed in separate text boxes for extracted English text, extracted Hindi text, and search results.
89
- """
90
-
91
- # Gradio
92
  with gr.Blocks() as app:
93
  gr.Markdown("### OCR Application")
94
  image_input = gr.Image(type="pil", label="Upload Image")
@@ -101,8 +79,5 @@ with gr.Blocks() as app:
101
  submit_button = gr.Button("Submit")
102
  submit_button.click(fn=ocr_and_search, inputs=[image_input, language_selection, keyword_input], outputs=[output_english, output_hindi, output_search])
103
 
104
- """**Application Launch**:
105
- - Finally, the Gradio app is launched, making the OCR application accessible for user interaction. This enables real-time testing and usability of the OCR functionalities implemented in the previous notebook.
106
- """
107
-
108
  app.launch()
 
1
  # -*- coding: utf-8 -*-
 
 
 
 
 
 
 
 
 
 
 
 
2
  """
3
+ Final WebApp using Gradio
 
 
 
 
 
 
 
4
  """
5
 
6
+ # Required Libraries
7
  import cv2
8
+ import torch
9
  from pytesseract import pytesseract
10
  from transformers import AutoModel, AutoTokenizer
11
  import gradio as gr
12
+ import tempfile
13
+ import os
14
 
15
+ # Check if GPU is available
16
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
17
 
18
+ # Load models for OCR
19
  tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
20
+ model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).to(device).eval()
21
 
22
+ # Tesseract configuration for Hindi OCR
23
  pytesseract.tesseract_cmd = '/usr/bin/tesseract'
24
  tesseract_config = '--oem 3 --psm 6 -l hin'
25
 
26
+ # OCR function for both English and Hindi
 
 
 
27
  def perform_ocr(img, language):
28
+ # Use a temporary file for the uploaded image
29
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_img:
30
+ img.save(temp_img.name)
31
+ img_path = temp_img.name
32
 
33
  res_eng = ""
34
  res_hin = ""
35
 
36
  if language in ["English", "Both"]:
37
+ # Ensure that inference is done on the correct device (GPU or CPU)
38
+ with torch.no_grad():
39
+ res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')
40
 
41
  if language in ["Hindi", "Both"]:
42
  img_cv = cv2.imread(img_path)
43
  res_hin = pytesseract.image_to_string(img_cv, config=tesseract_config)
44
 
45
+ # Cleanup temporary file
46
+ os.remove(img_path)
47
 
48
+ return res_eng, res_hin
 
 
49
 
50
+ # Keyword Search Functionality
51
  def ocr_and_search(image, language, keyword):
 
52
  english_text, hindi_text = perform_ocr(image, language)
53
 
54
  extracted_english = f"Extracted English Text:\n{english_text}" if english_text else "No English text extracted."
 
57
  # Search for the keyword in the extracted text
58
  search_results = []
59
  if keyword:
 
60
  if language in ["English", "Both"] and keyword.lower() in english_text.lower():
61
  search_results.append(f"Keyword '{keyword}' found in English text.")
 
62
  if language in ["Hindi", "Both"] and keyword.lower() in hindi_text.lower():
63
  search_results.append(f"Keyword '{keyword}' found in Hindi text.")
64
 
 
66
 
67
  return extracted_english, extracted_hindi, search_output
68
 
69
+ # Gradio Interface Setup
 
 
 
 
 
70
  with gr.Blocks() as app:
71
  gr.Markdown("### OCR Application")
72
  image_input = gr.Image(type="pil", label="Upload Image")
 
79
  submit_button = gr.Button("Submit")
80
  submit_button.click(fn=ocr_and_search, inputs=[image_input, language_selection, keyword_input], outputs=[output_english, output_hindi, output_search])
81
 
82
+ # Launch the Gradio app
 
 
 
83
  app.launch()