Update app.py
Browse files
app.py
CHANGED
@@ -1,67 +1,41 @@
|
|
1 |
-
import cv2
|
2 |
-
from pytesseract import pytesseract
|
3 |
-
from transformers import AutoModel, AutoTokenizer
|
4 |
import gradio as gr
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
# Load GOT2 model for English text and configure Tesseract for Hindi text
|
8 |
-
tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
|
9 |
-
model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, device_map='cpu').eval()
|
10 |
-
|
11 |
-
# Define Tesseract path and configuration for Hindi
|
12 |
-
pytesseract.tesseract_cmd = '/usr/bin/tesseract'
|
13 |
-
tesseract_config = '--oem 3 --psm 6 -l hin'
|
14 |
-
|
15 |
-
# Perform OCR function
|
16 |
-
def perform_ocr(img, language):
|
17 |
-
img_path = "/tmp/uploaded_image.png"
|
18 |
-
img.save(img_path)
|
19 |
-
|
20 |
-
res_eng = ""
|
21 |
-
res_hin = ""
|
22 |
-
|
23 |
-
if language in ["English", "Both"]:
|
24 |
-
res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')
|
25 |
-
|
26 |
-
if language in ["Hindi", "Both"]:
|
27 |
-
img_cv = cv2.imread(img_path)
|
28 |
-
res_hin = pytesseract.image_to_string(img_cv, config=tesseract_config)
|
29 |
-
|
30 |
-
return res_eng, res_hin
|
31 |
-
|
32 |
-
# Keyword Search Functionality
|
33 |
-
def ocr_and_search(image, language, keyword):
|
34 |
-
english_text, hindi_text = perform_ocr(image, language)
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
66 |
if __name__ == "__main__":
|
67 |
-
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import torch
|
3 |
+
import pytesseract
|
4 |
+
from transformers import AutoTokenizer, AutoModel
|
5 |
|
6 |
+
pytesseract.pytesseract.tesseract_cmd = r'/opt/homebrew/bin/tesseract'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
# Load the tokenizer and model
|
9 |
+
tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
|
10 |
+
model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).eval()
|
11 |
+
|
12 |
+
def perform_ocr(image, language):
|
13 |
+
# Perform OCR for English
|
14 |
+
img_cv = image # Assuming image is already in the correct format
|
15 |
+
res_eng = model_eng.chat(tokenizer_eng, img_cv, ocr_type='ocr')
|
16 |
+
|
17 |
+
return res_eng # Return results for English
|
18 |
+
|
19 |
+
def ocr_and_search(image, language):
|
20 |
+
# Call the perform_ocr function
|
21 |
+
english_text = perform_ocr(image, language)
|
22 |
+
# You may also want to implement any searching functionality here
|
23 |
+
# ...
|
24 |
+
|
25 |
+
return english_text # Return the OCR result for English
|
26 |
+
|
27 |
+
# Create Gradio interface
|
28 |
+
iface = gr.Interface(
|
29 |
+
fn=ocr_and_search,
|
30 |
+
inputs=[
|
31 |
+
gr.Image(type="numpy", label="Upload Image"),
|
32 |
+
gr.Dropdown(choices=["English", "Hindi"], label="Select Language")
|
33 |
+
],
|
34 |
+
outputs=gr.Textbox(label="Extracted Text"),
|
35 |
+
title="OCR Application",
|
36 |
+
description="Upload an image to extract text using OCR."
|
37 |
+
)
|
38 |
+
|
39 |
+
# Run the app
|
40 |
if __name__ == "__main__":
|
41 |
+
iface.launch()
|