Spaces:

llmat
/

OCR_Tutorial

Sleeping

App Files Files Community

llmat commited on Jul 3, 2024

Commit

faf3b9f

verified ·

1 Parent(s): 6b27525

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -56

app.py CHANGED Viewed

@@ -1,71 +1,91 @@
 import gradio as gr
 import cv2
-import easyocr
-from PIL import Image
-# Functions for OCR steps
 def get_grayscale(image):
     return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 def thresholding(src):
-    return cv2.threshold(src, 127, 255, cv2.THRESH_TOZERO)[1]
-def ocr_with_easy(img):
-    reader = easyocr.Reader(['en'])
-    bounds = reader.readtext(img, paragraph="False", detail=0)
-    bounds = ''.join(bounds)
-    return bounds
-def process_image(img, steps):
     for step in steps:
         if step == "Grayscale Conversion":
             img = get_grayscale(img)
         elif step == "Thresholding":
             img = thresholding(img)
-    cv2.imwrite('processed_image.png', img)
-    return 'processed_image.png'
-def generate_ocr(img, steps):
-    text_output = ''
-    if img is not None and (img).any():
-        processed_image_path = process_image(img, steps)
-        text_output = ocr_with_easy(processed_image_path)
-    else:
-        raise gr.Error("Please upload an image and select the processing steps!")
-    return text_output
 # Interactive tutorial steps
 tutorial_steps = [
     "Grayscale Conversion",
-    "Thresholding"
 ]
 # Interactive questions
 questions = [
     {
-        "question": "What is the first step in OCR?",
-        "options": ["Binarization", "Grayscale Conversion", "Edge Detection"],
         "answer": "Grayscale Conversion"
     },
     {
-        "question": "What is the purpose of thresholding in OCR?",
-        "options": ["To detect edges", "To convert image to grayscale", "To binarize the image"],
-        "answer": "To binarize the image"
     },
     {
-        "question": "Which library is used for OCR in this app?",
-        "options": ["Tesseract", "EasyOCR", "OpenCV"],
-        "answer": "EasyOCR"
     },
     {
-        "question": "What format is the image saved in after preprocessing?",
-        "options": ["JPG", "PNG", "TIFF"],
-        "answer": "PNG"
     },
     {
-        "question": "What does OCR stand for?",
-        "options": ["Optical Character Recognition", "Optical Character Reading", "Optical Code Recognition"],
-        "answer": "Optical Character Recognition"
     }
 ]
@@ -98,38 +118,36 @@ def quiz_interface():
 # Explanation text
 explanation_text = """
-**Welcome to the OCR Tutorial!**
-Optical Character Recognition (OCR) is a technology used to convert different types of documents, such as scanned paper documents, PDF files, or images captured by a digital camera, into editable and searchable data.
-**Steps in the OCR Process:**
-1. **Grayscale Conversion:** The first step in OCR is converting the image to grayscale. This simplifies the image and reduces the amount of data the OCR algorithm needs to process.
-2. **Thresholding:** This step converts the grayscale image into a binary image, where the text is in black, and the background is in white. This makes it easier for the OCR algorithm to distinguish text from the background.
-3. **OCR using EasyOCR:** We use the EasyOCR library to recognize and extract text from the preprocessed image.
 **Interactive Tutorial:**
-Please upload an image and select the correct order of steps to perform OCR.
 """
-image = gr.Image()
-steps = gr.CheckboxGroup(choices=tutorial_steps, label="Select and order the steps for OCR")
-output = gr.Textbox(label="OCR Output")
 explanation = gr.Markdown(explanation_text)
-ocr_app = gr.Interface(
-    fn=generate_ocr,
     inputs=[image, steps],
     outputs=output,
-    title="Optical Character Recognition",
     description=explanation_text,
     css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}"
 )
 quiz_app = gr.TabbedInterface(
-    [ocr_app] + quiz_interface(),
-    ["OCR Tool"] + [f"Question {i+1}" for i in range(len(questions))],
-    title="OCR Tutorial and Quiz"
 )
 quiz_app.launch()

 import gradio as gr
 import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+from tensorflow.keras.datasets import mnist
+# Functions for MNIST processing steps
+def load_mnist():
+    (x_train, y_train), (x_test, y_test) = mnist.load_data()
+    return x_test, y_test
 def get_grayscale(image):
     return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 def thresholding(src):
+    return cv2.threshold(src, 127, 255, cv2.THRESH_BINARY)[1]
+def gaussian_blur(image):
+    return cv2.GaussianBlur(image, (5, 5), 0)
+def edge_detection(image):
+    return cv2.Canny(image, 100, 200)
+def process_mnist_image(img, steps):
+    original_img = img.copy()
+    step_images = {'Original': original_img}
     for step in steps:
         if step == "Grayscale Conversion":
             img = get_grayscale(img)
         elif step == "Thresholding":
             img = thresholding(img)
+        elif step == "Gaussian Blur":
+            img = gaussian_blur(img)
+        elif step == "Edge Detection":
+            img = edge_detection(img)
+        step_images[step] = img
+    return step_images
+def visualize_steps(img, steps):
+    step_images = process_mnist_image(img, steps)
+    fig, axes = plt.subplots(1, len(step_images), figsize=(15, 5))
+    for ax, (step, img) in zip(axes, step_images.items()):
+        ax.imshow(img, cmap='gray')
+        ax.set_title(step)
+        ax.axis('off')
+    plt.tight_layout()
+    plt.savefig('mnist_processing_steps.png')
+    return 'mnist_processing_steps.png'
 # Interactive tutorial steps
 tutorial_steps = [
     "Grayscale Conversion",
+    "Thresholding",
+    "Gaussian Blur",
+    "Edge Detection"
 ]
 # Interactive questions
 questions = [
     {
+        "question": "What is the first step in preprocessing MNIST images?",
+        "options": ["Gaussian Blur", "Grayscale Conversion", "Edge Detection"],
         "answer": "Grayscale Conversion"
     },
     {
+        "question": "What does thresholding do in image preprocessing?",
+        "options": ["Detects edges", "Blurs the image", "Binarizes the image"],
+        "answer": "Binarizes the image"
     },
     {
+        "question": "What library is used to load the MNIST dataset?",
+        "options": ["OpenCV", "TensorFlow", "Pytorch"],
+        "answer": "TensorFlow"
     },
     {
+        "question": "What color space conversion is used in Grayscale Conversion?",
+        "options": ["BGR to RGB", "RGB to GRAY", "BGR to GRAY"],
+        "answer": "BGR to GRAY"
     },
     {
+        "question": "What is the purpose of Gaussian Blur?",
+        "options": ["Detect edges", "Reduce noise", "Convert to grayscale"],
+        "answer": "Reduce noise"
     }
 ]
 # Explanation text
 explanation_text = """
+**Welcome to the MNIST Processing Tutorial!**
+This tutorial will guide you through the basic steps of preprocessing images from the MNIST dataset.
+**Steps in the MNIST Image Processing:**
+1. **Grayscale Conversion:** Converts the image to grayscale to simplify the data.
+2. **Thresholding:** Converts the grayscale image into a binary image to distinguish the digits more clearly.
+3. **Gaussian Blur:** Applies a blur to reduce noise and detail in the image.
+4. **Edge Detection:** Detects the edges of the digits to enhance the features for further processing or recognition tasks.
 **Interactive Tutorial:**
+Please upload an MNIST image and select the preprocessing steps to visualize their effects.
 """
+(x_test, y_test) = load_mnist()
+image = gr.Image(shape=(28, 28), image_mode='L')
+steps = gr.CheckboxGroup(choices=tutorial_steps, label="Select and order the steps for MNIST processing")
+output = gr.Image(type='file', label="Processing Steps Visualization")
 explanation = gr.Markdown(explanation_text)
+mnist_app = gr.Interface(
+    fn=visualize_steps,
     inputs=[image, steps],
     outputs=output,
+    title="MNIST Image Processing",
     description=explanation_text,
     css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}"
 )
 quiz_app = gr.TabbedInterface(
+    [mnist_app] + quiz_interface(),
+    ["MNIST Tool"] + [f"Question {i+1}" for i in range(len(questions))],
+    title="MNIST Tutorial and Quiz"
 )
 quiz_app.launch()