|
import streamlit as st |
|
import numpy as np |
|
import pandas as pd |
|
import easyocr |
|
from PIL import Image |
|
from streamlit_drawable_canvas import st_canvas |
|
|
|
def rectangle(image, result): |
|
"""Draw rectangles on image based on predicted coordinates using PIL.""" |
|
from PIL import ImageDraw |
|
draw = ImageDraw.Draw(image) |
|
for res in result: |
|
top_left = tuple(res[0][0]) |
|
bottom_right = tuple(res[0][2]) |
|
draw.rectangle((top_left, bottom_right), outline="blue", width=2) |
|
return image |
|
|
|
def main(): |
|
|
|
st.set_page_config( |
|
page_title="OCR App", |
|
page_icon=":mag:", |
|
layout="centered", |
|
initial_sidebar_state="auto", |
|
) |
|
|
|
st.title("Optical Character Recognition (OCR) with EasyOCR") |
|
|
|
st.markdown(""" |
|
Upload an image or use the canvas to draw, and the app will recognize and extract text from it. |
|
Supported languages for recognition include English and Japanese. |
|
""") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
with col1: |
|
file = st.file_uploader("Upload Image", type=['png', 'jpg', 'jpeg']) |
|
with col2: |
|
stroke_width = st.slider("Stroke Width: ", 1, 25, 3) |
|
canvas_result = st_canvas( |
|
fill_color="rgba(255, 165, 0, 0.3)", |
|
stroke_width=stroke_width, |
|
stroke_color="#ffffff", |
|
background_color="#000000", |
|
update_streamlit=True, |
|
width=400, |
|
height=400, |
|
drawing_mode="freedraw", |
|
key="canvas", |
|
) |
|
|
|
|
|
if file is not None: |
|
image = Image.open(file).convert('RGB') |
|
elif canvas_result.image_data is not None: |
|
image = Image.fromarray(np.array(canvas_result.image_data).astype('uint8'), 'RGBA').convert('RGB') |
|
else: |
|
st.warning("Please upload an image or use the canvas to draw.") |
|
st.stop() |
|
|
|
st.image(image, caption='Uploaded Image', use_column_width=True) |
|
|
|
|
|
reader = easyocr.Reader(['en', 'ja'], gpu=False) |
|
result = reader.readtext(np.array(image)) |
|
|
|
|
|
if result: |
|
image = rectangle(image, result) |
|
st.image(image, caption='Processed Image with Detected Text', use_column_width=True) |
|
|
|
textdic_easyocr = {idx: {'text': res[1], 'confidence': res[2]} for idx, res in enumerate(result)} |
|
df = pd.DataFrame.from_dict(textdic_easyocr, orient='index') |
|
st.dataframe(df) |
|
else: |
|
st.info("No text detected.") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|