yashbyname commited on
Commit
add4d26
·
verified ·
1 Parent(s): a1883c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -16
app.py CHANGED
@@ -2,35 +2,49 @@ import gradio as gr
2
  import torch
3
  import pytesseract
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
5
 
6
  # Load models and tokenizers
7
- tokenizer_eng = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0")
8
- tokenizer_hin = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0")
9
- model_eng = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0")
10
- model_hin = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0")
11
 
12
- def perform_ocr(image, language):
 
13
  # Set device to CPU or GPU
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
  model_eng.to(device)
17
  model_hin.to(device)
18
 
19
- # Process the image using the appropriate model
20
- img_path = "path/to/your/image" # Set this path appropriately
21
- res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')
22
- img_cv = cv2.imread(img_path)
23
-
 
 
 
24
  # Use pytesseract for Hindi OCR
25
  tesseract_config = '--psm 6'
26
- res_hin = pytesseract.image_to_string(img_cv, lang='hin', config=tesseract_config)
27
 
28
- return res_eng, res_hin
29
 
30
- def ocr_and_search(image, language):
31
- english_text, hindi_text = perform_ocr(image, language)
 
32
  return english_text, hindi_text
33
 
34
- # Gradio
35
- iface = gr.Interface(fn=ocr_and_search, inputs=["image", "dropdown"], outputs=["text", "text"])
 
 
 
 
 
 
 
 
36
  iface.launch()
 
2
  import torch
3
  import pytesseract
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ import cv2 # Ensure you have OpenCV installed
6
 
7
  # Load models and tokenizers
8
+ tokenizer_eng = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
9
+ tokenizer_hin = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
10
+ model_eng = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
11
+ model_hin = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
12
 
13
+ def perform_ocr(image):
14
+ """Perform OCR on the image for both English and Hindi."""
15
  # Set device to CPU or GPU
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
 
18
  model_eng.to(device)
19
  model_hin.to(device)
20
 
21
+ # Convert the input image to an OpenCV format
22
+ img_cv = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Convert to BGR for OpenCV
23
+ img_path = "temp_image.jpg" # Temporary path to save the image
24
+ cv2.imwrite(img_path, img_cv) # Save the image temporarily
25
+
26
+ # Use pytesseract for English OCR
27
+ english_text = pytesseract.image_to_string(img_cv)
28
+
29
  # Use pytesseract for Hindi OCR
30
  tesseract_config = '--psm 6'
31
+ hindi_text = pytesseract.image_to_string(img_cv, lang='hin', config=tesseract_config)
32
 
33
+ return english_text, hindi_text
34
 
35
+ def ocr_and_search(image):
36
+ """Process the image and extract text in both languages."""
37
+ english_text, hindi_text = perform_ocr(image)
38
  return english_text, hindi_text
39
 
40
+ # Gradio interface
41
+ iface = gr.Interface(
42
+ fn=ocr_and_search,
43
+ inputs=gr.inputs.Image(type="numpy"), # Use numpy array for OpenCV
44
+ outputs=["text", "text"],
45
+ title="OCR for English and Hindi",
46
+ description="Upload an image to extract text in English and Hindi."
47
+ )
48
+
49
+ # Launch the interface
50
  iface.launch()