Update app.py
Browse files
app.py
CHANGED
@@ -2,35 +2,49 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
import pytesseract
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
5 |
|
6 |
# Load models and tokenizers
|
7 |
-
tokenizer_eng = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0")
|
8 |
-
tokenizer_hin = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0")
|
9 |
-
model_eng = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0")
|
10 |
-
model_hin = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0")
|
11 |
|
12 |
-
def perform_ocr(image
|
|
|
13 |
# Set device to CPU or GPU
|
14 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
15 |
|
16 |
model_eng.to(device)
|
17 |
model_hin.to(device)
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
24 |
# Use pytesseract for Hindi OCR
|
25 |
tesseract_config = '--psm 6'
|
26 |
-
|
27 |
|
28 |
-
return
|
29 |
|
30 |
-
def ocr_and_search(image
|
31 |
-
|
|
|
32 |
return english_text, hindi_text
|
33 |
|
34 |
-
# Gradio
|
35 |
-
iface = gr.Interface(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
iface.launch()
|
|
|
2 |
import torch
|
3 |
import pytesseract
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
+
import cv2 # Ensure you have OpenCV installed
|
6 |
|
7 |
# Load models and tokenizers
|
8 |
+
tokenizer_eng = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
|
9 |
+
tokenizer_hin = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
|
10 |
+
model_eng = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
|
11 |
+
model_hin = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
|
12 |
|
13 |
+
def perform_ocr(image):
|
14 |
+
"""Perform OCR on the image for both English and Hindi."""
|
15 |
# Set device to CPU or GPU
|
16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
17 |
|
18 |
model_eng.to(device)
|
19 |
model_hin.to(device)
|
20 |
|
21 |
+
# Convert the input image to an OpenCV format
|
22 |
+
img_cv = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Convert to BGR for OpenCV
|
23 |
+
img_path = "temp_image.jpg" # Temporary path to save the image
|
24 |
+
cv2.imwrite(img_path, img_cv) # Save the image temporarily
|
25 |
+
|
26 |
+
# Use pytesseract for English OCR
|
27 |
+
english_text = pytesseract.image_to_string(img_cv)
|
28 |
+
|
29 |
# Use pytesseract for Hindi OCR
|
30 |
tesseract_config = '--psm 6'
|
31 |
+
hindi_text = pytesseract.image_to_string(img_cv, lang='hin', config=tesseract_config)
|
32 |
|
33 |
+
return english_text, hindi_text
|
34 |
|
35 |
+
def ocr_and_search(image):
|
36 |
+
"""Process the image and extract text in both languages."""
|
37 |
+
english_text, hindi_text = perform_ocr(image)
|
38 |
return english_text, hindi_text
|
39 |
|
40 |
+
# Gradio interface
|
41 |
+
iface = gr.Interface(
|
42 |
+
fn=ocr_and_search,
|
43 |
+
inputs=gr.inputs.Image(type="numpy"), # Use numpy array for OpenCV
|
44 |
+
outputs=["text", "text"],
|
45 |
+
title="OCR for English and Hindi",
|
46 |
+
description="Upload an image to extract text in English and Hindi."
|
47 |
+
)
|
48 |
+
|
49 |
+
# Launch the interface
|
50 |
iface.launch()
|