yashbyname commited on
Commit
cb39282
·
verified ·
1 Parent(s): 53f01c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -52
app.py CHANGED
@@ -1,60 +1,40 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Final WebApp using Gradio
4
- """
5
 
6
- # Required Libraries
7
  import cv2
8
- import torch
9
  from pytesseract import pytesseract
10
  from transformers import AutoModel, AutoTokenizer
11
  import gradio as gr
12
- import tempfile
13
- import os
14
 
15
- # Check if GPU is available
16
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17
-
18
- # Load models for OCR
19
- tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
20
- model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).to(device).eval()
21
-
22
- # Tesseract configuration for Hindi OCR
23
- pytesseract.tesseract_cmd = '/usr/bin/tesseract'
24
- tesseract_config = '--oem 3 --psm 6 -l hin'
25
-
26
- # OCR function for both English and Hindi
27
- def perform_ocr(img, language):
28
- # Use a temporary file for the uploaded image
29
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_img:
30
- img.save(temp_img.name)
31
- img_path = temp_img.name
32
 
33
  res_eng = ""
34
  res_hin = ""
35
 
36
  if language in ["English", "Both"]:
37
- # Ensure that inference is done on the correct device (GPU or CPU)
38
- with torch.no_grad():
39
- # Move inputs to the appropriate device
40
- try:
41
- res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')
42
- except Exception as e:
43
- print(f"Error during English OCR: {e}")
44
- res_eng = "Error during English OCR"
45
 
46
  if language in ["Hindi", "Both"]:
47
  img_cv = cv2.imread(img_path)
48
  res_hin = pytesseract.image_to_string(img_cv, config=tesseract_config)
49
 
50
- # Cleanup temporary file
51
- os.remove(img_path)
52
-
53
  return res_eng, res_hin
54
 
55
  # Keyword Search Functionality
56
- def ocr_and_search(image, language, keyword):
57
- english_text, hindi_text = perform_ocr(image, language)
58
 
59
  extracted_english = f"Extracted English Text:\n{english_text}" if english_text else "No English text extracted."
60
  extracted_hindi = f"Extracted Hindi Text:\n{hindi_text}" if hindi_text else "No Hindi text extracted."
@@ -64,6 +44,7 @@ def ocr_and_search(image, language, keyword):
64
  if keyword:
65
  if language in ["English", "Both"] and keyword.lower() in english_text.lower():
66
  search_results.append(f"Keyword '{keyword}' found in English text.")
 
67
  if language in ["Hindi", "Both"] and keyword.lower() in hindi_text.lower():
68
  search_results.append(f"Keyword '{keyword}' found in Hindi text.")
69
 
@@ -72,17 +53,25 @@ def ocr_and_search(image, language, keyword):
72
  return extracted_english, extracted_hindi, search_output
73
 
74
  # Gradio Interface Setup
75
- with gr.Blocks() as app:
76
- gr.Markdown("### OCR Application")
77
- image_input = gr.Image(type="pil", label="Upload Image")
78
- language_selection = gr.Radio(choices=["English", "Hindi", "Both"], label="Select Language")
79
- keyword_input = gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search")
80
- output_english = gr.Textbox(label="Extracted English Text", interactive=False)
81
- output_hindi = gr.Textbox(label="Extracted Hindi Text", interactive=False)
82
- output_search = gr.Textbox(label="Search Results", interactive=False)
83
-
84
- submit_button = gr.Button("Submit")
85
- submit_button.click(fn=ocr_and_search, inputs=[image_input, language_selection, keyword_input], outputs=[output_english, output_hindi, output_search])
86
-
87
- # Launch the Gradio app
88
- app.launch()
 
 
 
 
 
 
 
 
 
1
+ # app.py
 
 
 
2
 
 
3
  import cv2
 
4
  from pytesseract import pytesseract
5
  from transformers import AutoModel, AutoTokenizer
6
  import gradio as gr
 
 
7
 
8
+ # Model and Tesseract Configuration
9
+ def load_models():
10
+ tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
11
+ model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).eval().cuda()
12
+
13
+ pytesseract.tesseract_cmd = '/usr/bin/tesseract'
14
+ tesseract_config = '--oem 3 --psm 6 -l hin'
15
+
16
+ return tokenizer_eng, model_eng, tesseract_config
17
+
18
+ # Perform OCR Function
19
+ def perform_ocr(img, language, model_eng, tesseract_config):
20
+ img_path = "/tmp/uploaded_image.png"
21
+ img.save(img_path)
 
 
 
22
 
23
  res_eng = ""
24
  res_hin = ""
25
 
26
  if language in ["English", "Both"]:
27
+ res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')
 
 
 
 
 
 
 
28
 
29
  if language in ["Hindi", "Both"]:
30
  img_cv = cv2.imread(img_path)
31
  res_hin = pytesseract.image_to_string(img_cv, config=tesseract_config)
32
 
 
 
 
33
  return res_eng, res_hin
34
 
35
  # Keyword Search Functionality
36
+ def ocr_and_search(image, language, keyword, model_eng, tesseract_config):
37
+ english_text, hindi_text = perform_ocr(image, language, model_eng, tesseract_config)
38
 
39
  extracted_english = f"Extracted English Text:\n{english_text}" if english_text else "No English text extracted."
40
  extracted_hindi = f"Extracted Hindi Text:\n{hindi_text}" if hindi_text else "No Hindi text extracted."
 
44
  if keyword:
45
  if language in ["English", "Both"] and keyword.lower() in english_text.lower():
46
  search_results.append(f"Keyword '{keyword}' found in English text.")
47
+
48
  if language in ["Hindi", "Both"] and keyword.lower() in hindi_text.lower():
49
  search_results.append(f"Keyword '{keyword}' found in Hindi text.")
50
 
 
53
  return extracted_english, extracted_hindi, search_output
54
 
55
  # Gradio Interface Setup
56
+ def create_interface(model_eng, tesseract_config):
57
+ with gr.Blocks() as app:
58
+ gr.Markdown("### OCR Application")
59
+ image_input = gr.Image(type="pil", label="Upload Image")
60
+ language_selection = gr.Radio(choices=["English", "Hindi", "Both"], label="Select Language")
61
+ keyword_input = gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search")
62
+ output_english = gr.Textbox(label="Extracted English Text", interactive=False)
63
+ output_hindi = gr.Textbox(label="Extracted Hindi Text", interactive=False)
64
+ output_search = gr.Textbox(label="Search Results", interactive=False)
65
+
66
+ submit_button = gr.Button("Submit")
67
+ submit_button.click(fn=ocr_and_search, inputs=[image_input, language_selection, keyword_input], outputs=[output_english, output_hindi, output_search])
68
+
69
+ return app
70
+
71
+ def main():
72
+ tokenizer_eng, model_eng, tesseract_config = load_models()
73
+ app = create_interface(model_eng, tesseract_config)
74
+ app.launch()
75
+
76
+ if __name__ == "__main__":
77
+ main()