llmat commited on
Commit
faf3b9f
·
verified ·
1 Parent(s): 6b27525

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -56
app.py CHANGED
@@ -1,71 +1,91 @@
1
  import gradio as gr
2
  import cv2
3
- import easyocr
4
- from PIL import Image
 
 
 
 
 
 
5
 
6
- # Functions for OCR steps
7
  def get_grayscale(image):
8
  return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
9
 
10
  def thresholding(src):
11
- return cv2.threshold(src, 127, 255, cv2.THRESH_TOZERO)[1]
 
 
 
12
 
13
- def ocr_with_easy(img):
14
- reader = easyocr.Reader(['en'])
15
- bounds = reader.readtext(img, paragraph="False", detail=0)
16
- bounds = ''.join(bounds)
17
- return bounds
 
18
 
19
- def process_image(img, steps):
20
  for step in steps:
21
  if step == "Grayscale Conversion":
22
  img = get_grayscale(img)
23
  elif step == "Thresholding":
24
  img = thresholding(img)
25
- cv2.imwrite('processed_image.png', img)
26
- return 'processed_image.png'
27
-
28
- def generate_ocr(img, steps):
29
- text_output = ''
30
- if img is not None and (img).any():
31
- processed_image_path = process_image(img, steps)
32
- text_output = ocr_with_easy(processed_image_path)
33
- else:
34
- raise gr.Error("Please upload an image and select the processing steps!")
35
- return text_output
 
 
 
 
 
 
 
 
 
 
36
 
37
  # Interactive tutorial steps
38
  tutorial_steps = [
39
  "Grayscale Conversion",
40
- "Thresholding"
 
 
41
  ]
42
 
43
  # Interactive questions
44
  questions = [
45
  {
46
- "question": "What is the first step in OCR?",
47
- "options": ["Binarization", "Grayscale Conversion", "Edge Detection"],
48
  "answer": "Grayscale Conversion"
49
  },
50
  {
51
- "question": "What is the purpose of thresholding in OCR?",
52
- "options": ["To detect edges", "To convert image to grayscale", "To binarize the image"],
53
- "answer": "To binarize the image"
54
  },
55
  {
56
- "question": "Which library is used for OCR in this app?",
57
- "options": ["Tesseract", "EasyOCR", "OpenCV"],
58
- "answer": "EasyOCR"
59
  },
60
  {
61
- "question": "What format is the image saved in after preprocessing?",
62
- "options": ["JPG", "PNG", "TIFF"],
63
- "answer": "PNG"
64
  },
65
  {
66
- "question": "What does OCR stand for?",
67
- "options": ["Optical Character Recognition", "Optical Character Reading", "Optical Code Recognition"],
68
- "answer": "Optical Character Recognition"
69
  }
70
  ]
71
 
@@ -98,38 +118,36 @@ def quiz_interface():
98
 
99
  # Explanation text
100
  explanation_text = """
101
- **Welcome to the OCR Tutorial!**
102
-
103
- Optical Character Recognition (OCR) is a technology used to convert different types of documents, such as scanned paper documents, PDF files, or images captured by a digital camera, into editable and searchable data.
104
-
105
- **Steps in the OCR Process:**
106
- 1. **Grayscale Conversion:** The first step in OCR is converting the image to grayscale. This simplifies the image and reduces the amount of data the OCR algorithm needs to process.
107
- 2. **Thresholding:** This step converts the grayscale image into a binary image, where the text is in black, and the background is in white. This makes it easier for the OCR algorithm to distinguish text from the background.
108
- 3. **OCR using EasyOCR:** We use the EasyOCR library to recognize and extract text from the preprocessed image.
109
-
110
  **Interactive Tutorial:**
111
- Please upload an image and select the correct order of steps to perform OCR.
112
  """
113
 
114
- image = gr.Image()
115
- steps = gr.CheckboxGroup(choices=tutorial_steps, label="Select and order the steps for OCR")
116
- output = gr.Textbox(label="OCR Output")
 
117
  explanation = gr.Markdown(explanation_text)
118
 
119
- ocr_app = gr.Interface(
120
- fn=generate_ocr,
121
  inputs=[image, steps],
122
  outputs=output,
123
- title="Optical Character Recognition",
124
  description=explanation_text,
125
  css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}"
126
  )
127
 
128
  quiz_app = gr.TabbedInterface(
129
- [ocr_app] + quiz_interface(),
130
- ["OCR Tool"] + [f"Question {i+1}" for i in range(len(questions))],
131
- title="OCR Tutorial and Quiz"
132
  )
133
 
134
  quiz_app.launch()
135
-
 
1
  import gradio as gr
2
  import cv2
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from tensorflow.keras.datasets import mnist
6
+
7
+ # Functions for MNIST processing steps
8
+ def load_mnist():
9
+ (x_train, y_train), (x_test, y_test) = mnist.load_data()
10
+ return x_test, y_test
11
 
 
12
  def get_grayscale(image):
13
  return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
14
 
15
  def thresholding(src):
16
+ return cv2.threshold(src, 127, 255, cv2.THRESH_BINARY)[1]
17
+
18
+ def gaussian_blur(image):
19
+ return cv2.GaussianBlur(image, (5, 5), 0)
20
 
21
+ def edge_detection(image):
22
+ return cv2.Canny(image, 100, 200)
23
+
24
+ def process_mnist_image(img, steps):
25
+ original_img = img.copy()
26
+ step_images = {'Original': original_img}
27
 
 
28
  for step in steps:
29
  if step == "Grayscale Conversion":
30
  img = get_grayscale(img)
31
  elif step == "Thresholding":
32
  img = thresholding(img)
33
+ elif step == "Gaussian Blur":
34
+ img = gaussian_blur(img)
35
+ elif step == "Edge Detection":
36
+ img = edge_detection(img)
37
+
38
+ step_images[step] = img
39
+
40
+ return step_images
41
+
42
+ def visualize_steps(img, steps):
43
+ step_images = process_mnist_image(img, steps)
44
+
45
+ fig, axes = plt.subplots(1, len(step_images), figsize=(15, 5))
46
+ for ax, (step, img) in zip(axes, step_images.items()):
47
+ ax.imshow(img, cmap='gray')
48
+ ax.set_title(step)
49
+ ax.axis('off')
50
+
51
+ plt.tight_layout()
52
+ plt.savefig('mnist_processing_steps.png')
53
+ return 'mnist_processing_steps.png'
54
 
55
  # Interactive tutorial steps
56
  tutorial_steps = [
57
  "Grayscale Conversion",
58
+ "Thresholding",
59
+ "Gaussian Blur",
60
+ "Edge Detection"
61
  ]
62
 
63
  # Interactive questions
64
  questions = [
65
  {
66
+ "question": "What is the first step in preprocessing MNIST images?",
67
+ "options": ["Gaussian Blur", "Grayscale Conversion", "Edge Detection"],
68
  "answer": "Grayscale Conversion"
69
  },
70
  {
71
+ "question": "What does thresholding do in image preprocessing?",
72
+ "options": ["Detects edges", "Blurs the image", "Binarizes the image"],
73
+ "answer": "Binarizes the image"
74
  },
75
  {
76
+ "question": "What library is used to load the MNIST dataset?",
77
+ "options": ["OpenCV", "TensorFlow", "Pytorch"],
78
+ "answer": "TensorFlow"
79
  },
80
  {
81
+ "question": "What color space conversion is used in Grayscale Conversion?",
82
+ "options": ["BGR to RGB", "RGB to GRAY", "BGR to GRAY"],
83
+ "answer": "BGR to GRAY"
84
  },
85
  {
86
+ "question": "What is the purpose of Gaussian Blur?",
87
+ "options": ["Detect edges", "Reduce noise", "Convert to grayscale"],
88
+ "answer": "Reduce noise"
89
  }
90
  ]
91
 
 
118
 
119
  # Explanation text
120
  explanation_text = """
121
+ **Welcome to the MNIST Processing Tutorial!**
122
+ This tutorial will guide you through the basic steps of preprocessing images from the MNIST dataset.
123
+ **Steps in the MNIST Image Processing:**
124
+ 1. **Grayscale Conversion:** Converts the image to grayscale to simplify the data.
125
+ 2. **Thresholding:** Converts the grayscale image into a binary image to distinguish the digits more clearly.
126
+ 3. **Gaussian Blur:** Applies a blur to reduce noise and detail in the image.
127
+ 4. **Edge Detection:** Detects the edges of the digits to enhance the features for further processing or recognition tasks.
 
 
128
  **Interactive Tutorial:**
129
+ Please upload an MNIST image and select the preprocessing steps to visualize their effects.
130
  """
131
 
132
+ (x_test, y_test) = load_mnist()
133
+ image = gr.Image(shape=(28, 28), image_mode='L')
134
+ steps = gr.CheckboxGroup(choices=tutorial_steps, label="Select and order the steps for MNIST processing")
135
+ output = gr.Image(type='file', label="Processing Steps Visualization")
136
  explanation = gr.Markdown(explanation_text)
137
 
138
+ mnist_app = gr.Interface(
139
+ fn=visualize_steps,
140
  inputs=[image, steps],
141
  outputs=output,
142
+ title="MNIST Image Processing",
143
  description=explanation_text,
144
  css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}"
145
  )
146
 
147
  quiz_app = gr.TabbedInterface(
148
+ [mnist_app] + quiz_interface(),
149
+ ["MNIST Tool"] + [f"Question {i+1}" for i in range(len(questions))],
150
+ title="MNIST Tutorial and Quiz"
151
  )
152
 
153
  quiz_app.launch()