Spaces:

llmat
/

OCR_Tutorial

Sleeping

App Files Files Community

llmat commited on Jul 3, 2024

Commit

ed9a3a1

verified ·

1 Parent(s): da1265a

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -74

app.py CHANGED Viewed

@@ -1,91 +1,71 @@
 import gradio as gr
 import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-from tensorflow.keras.datasets import mnist
-# Functions for MNIST processing steps
-def load_mnist():
-    (x_train, y_train), (x_test, y_test) = mnist.load_data()
-    return x_test, y_test
 def get_grayscale(image):
     return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 def thresholding(src):
-    return cv2.threshold(src, 127, 255, cv2.THRESH_BINARY)[1]
-def gaussian_blur(image):
-    return cv2.GaussianBlur(image, (5, 5), 0)
-def edge_detection(image):
-    return cv2.Canny(image, 100, 200)
-def process_mnist_image(img, steps):
-    original_img = img.copy()
-    step_images = {'Original': original_img}
     for step in steps:
         if step == "Grayscale Conversion":
             img = get_grayscale(img)
         elif step == "Thresholding":
             img = thresholding(img)
-        elif step == "Gaussian Blur":
-            img = gaussian_blur(img)
-        elif step == "Edge Detection":
-            img = edge_detection(img)
-        step_images[step] = img
-    return step_images
-def visualize_steps(img, steps):
-    step_images = process_mnist_image(img, steps)
-    fig, axes = plt.subplots(1, len(step_images), figsize=(15, 5))
-    for ax, (step, img) in zip(axes, step_images.items()):
-        ax.imshow(img, cmap='gray')
-        ax.set_title(step)
-        ax.axis('off')
-    plt.tight_layout()
-    plt.savefig('mnist_processing_steps.png')
-    return 'mnist_processing_steps.png'
 # Interactive tutorial steps
 tutorial_steps = [
     "Grayscale Conversion",
-    "Thresholding",
-    "Gaussian Blur",
-    "Edge Detection"
 ]
 # Interactive questions
 questions = [
     {
-        "question": "What is the first step in preprocessing MNIST images?",
-        "options": ["Gaussian Blur", "Grayscale Conversion", "Edge Detection"],
         "answer": "Grayscale Conversion"
     },
     {
-        "question": "What does thresholding do in image preprocessing?",
-        "options": ["Detects edges", "Blurs the image", "Binarizes the image"],
-        "answer": "Binarizes the image"
     },
     {
-        "question": "What library is used to load the MNIST dataset?",
-        "options": ["OpenCV", "TensorFlow", "Pytorch"],
-        "answer": "TensorFlow"
     },
     {
-        "question": "What color space conversion is used in Grayscale Conversion?",
-        "options": ["BGR to RGB", "RGB to GRAY", "BGR to GRAY"],
-        "answer": "BGR to GRAY"
     },
     {
-        "question": "What is the purpose of Gaussian Blur?",
-        "options": ["Detect edges", "Reduce noise", "Convert to grayscale"],
-        "answer": "Reduce noise"
     }
 ]
@@ -118,36 +98,34 @@ def quiz_interface():
 # Explanation text
 explanation_text = """
-**Welcome to the MNIST Processing Tutorial!**
-This tutorial will guide you through the basic steps of preprocessing images from the MNIST dataset.
-**Steps in the MNIST Image Processing:**
-1. **Grayscale Conversion:** Converts the image to grayscale to simplify the data.
-2. **Thresholding:** Converts the grayscale image into a binary image to distinguish the digits more clearly.
-3. **Gaussian Blur:** Applies a blur to reduce noise and detail in the image.
-4. **Edge Detection:** Detects the edges of the digits to enhance the features for further processing or recognition tasks.
 **Interactive Tutorial:**
-Please upload an MNIST image and select the preprocessing steps to visualize their effects.
 """
-(x_test, y_test) = load_mnist()
-image = gr.Image(shape=(28, 28), image_mode='L')
-steps = gr.CheckboxGroup(choices=tutorial_steps, label="Select and order the steps for MNIST processing")
-output = gr.Image(type='file', label="Processing Steps Visualization")
 explanation = gr.Markdown(explanation_text)
-mnist_app = gr.Interface(
-    fn=visualize_steps,
     inputs=[image, steps],
     outputs=output,
-    title="MNIST Image Processing",
     description=explanation_text,
     css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}"
 )
 quiz_app = gr.TabbedInterface(
-    [mnist_app] + quiz_interface(),
-    ["MNIST Tool"] + [f"Question {i+1}" for i in range(len(questions))],
-    title="MNIST Tutorial and Quiz"
 )
 quiz_app.launch()

 import gradio as gr
 import cv2
+import easyocr
+from PIL import Image
+# Functions for OCR steps
 def get_grayscale(image):
     return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 def thresholding(src):
+    return cv2.threshold(src, 127, 255, cv2.THRESH_TOZERO)[1]
+def ocr_with_easy(img):
+    reader = easyocr.Reader(['en'])
+    bounds = reader.readtext(img, paragraph="False", detail=0)
+    bounds = ''.join(bounds)
+    return bounds
+def process_image(img, steps):
     for step in steps:
         if step == "Grayscale Conversion":
             img = get_grayscale(img)
         elif step == "Thresholding":
             img = thresholding(img)
+    cv2.imwrite('processed_image.png', img)
+    return 'processed_image.png'
+def generate_ocr(img, steps):
+    text_output = ''
+    if img is not None and (img).any():
+        processed_image_path = process_image(img, steps)
+        text_output = ocr_with_easy(processed_image_path)
+    else:
+        raise gr.Error("Please upload an image and select the processing steps!")
+    return text_output
 # Interactive tutorial steps
 tutorial_steps = [
     "Grayscale Conversion",
+    "Thresholding"
 ]
 # Interactive questions
 questions = [
     {
+        "question": "What is the first step in OCR?",
+        "options": ["Binarization", "Grayscale Conversion", "Edge Detection"],
         "answer": "Grayscale Conversion"
     },
     {
+        "question": "What is the purpose of thresholding in OCR?",
+        "options": ["To detect edges", "To convert image to grayscale", "To binarize the image"],
+        "answer": "To binarize the image"
     },
     {
+        "question": "Which library is used for OCR in this app?",
+        "options": ["Tesseract", "EasyOCR", "OpenCV"],
+        "answer": "EasyOCR"
     },
     {
+        "question": "What format is the image saved in after preprocessing?",
+        "options": ["JPG", "PNG", "TIFF"],
+        "answer": "PNG"
     },
     {
+        "question": "What does OCR stand for?",
+        "options": ["Optical Character Recognition", "Optical Character Reading", "Optical Code Recognition"],
+        "answer": "Optical Character Recognition"
     }
 ]
 # Explanation text
 explanation_text = """
+**Welcome to the OCR Tutorial!**
+Optical Character Recognition (OCR) is a technology used to convert different types of documents, such as scanned paper documents, PDF files, or images captured by a digital camera, into editable and searchable data.
+**Steps in the OCR Process:**
+1. **Grayscale Conversion:** The first step in OCR is converting the image to grayscale. This simplifies the image and reduces the amount of data the OCR algorithm needs to process.
+2. **Thresholding:** This step converts the grayscale image into a binary image, where the text is in black, and the background is in white. This makes it easier for the OCR algorithm to distinguish text from the background.
+3. **OCR using EasyOCR:** We use the EasyOCR library to recognize and extract text from the preprocessed image.
 **Interactive Tutorial:**
+Please upload an image and select the correct order of steps to perform OCR.
 """
+image = gr.Image()
+steps = gr.CheckboxGroup(choices=tutorial_steps, label="Select and order the steps for OCR")
+output = gr.Textbox(label="OCR Output")
 explanation = gr.Markdown(explanation_text)
+ocr_app = gr.Interface(
+    fn=generate_ocr,
     inputs=[image, steps],
     outputs=output,
+    title="Optical Character Recognition",
     description=explanation_text,
     css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}"
 )
 quiz_app = gr.TabbedInterface(
+    [ocr_app] + quiz_interface(),
+    ["OCR Tool"] + [f"Question {i+1}" for i in range(len(questions))],
+    title="OCR Tutorial and Quiz"
 )
 quiz_app.launch()