Spaces:
Sleeping
Sleeping
| import os | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| from path import Path | |
| import streamlit as st | |
| from typing import Tuple | |
| import easyocr # Import EasyOCR | |
| from pathlib import Path | |
| import sys | |
| # Add the 'app' directory to the sys.path | |
| # Assuming 'app' is in the current working directory | |
| sys.path.append(str(Path(__file__).parent / 'app')) | |
| from app.dataloader_iam import Batch | |
| from app.model import Model, DecoderType | |
| from app.preprocessor import Preprocessor | |
| from streamlit_drawable_canvas import st_canvas | |
| # Set page config at the very beginning (only executed once) | |
| st.set_page_config( | |
| page_title="HTR App", | |
| page_icon=":pencil:", | |
| layout="centered", | |
| initial_sidebar_state="auto", | |
| ) | |
| ms = st.session_state | |
| if "themes" not in ms: | |
| ms.themes = {"current_theme": "light", | |
| "refreshed": True, | |
| "light": {"theme.base": "dark", | |
| "theme.backgroundColor": "black", | |
| "theme.primaryColor": "#c98bdb", | |
| "theme.secondaryBackgroundColor": "#5591f5", | |
| "theme.textColor": "white", | |
| "theme.textColor": "white", | |
| "button_face": "π"}, | |
| "dark": {"theme.base": "light", | |
| "theme.backgroundColor": "white", | |
| "theme.primaryColor": "#5591f5", | |
| "theme.secondaryBackgroundColor": "#82E1D7", | |
| "theme.textColor": "#0a1464", | |
| "button_face": "π"}, | |
| } | |
| def ChangeTheme(): | |
| previous_theme = ms.themes["current_theme"] | |
| tdict = ms.themes["light"] if ms.themes["current_theme"] == "light" else ms.themes["dark"] | |
| for vkey, vval in tdict.items(): | |
| if vkey.startswith("theme"): st._config.set_option(vkey, vval) | |
| ms.themes["refreshed"] = False | |
| if previous_theme == "dark": ms.themes["current_theme"] = "light" | |
| elif previous_theme == "light": ms.themes["current_theme"] = "dark" | |
| btn_face = ms.themes["light"]["button_face"] if ms.themes["current_theme"] == "light" else ms.themes["dark"]["button_face"] | |
| st.button(btn_face, on_click=ChangeTheme) | |
| if ms.themes["refreshed"] == False: | |
| ms.themes["refreshed"] = True | |
| st.rerun() | |
| def get_img_size(line_mode: bool = False) -> Tuple[int, int]: | |
| """ | |
| Auxiliary method that sets the height and width | |
| Height is fixed while width is set according to the Model used. | |
| """ | |
| if line_mode: | |
| return 256, get_img_height() | |
| return 128, get_img_height() | |
| def get_img_height() -> int: | |
| """ | |
| Auxiliary method that sets the height, which is fixed for the Neural Network. | |
| """ | |
| return 32 | |
| def infer(line_mode: bool, model: Model, fn_img: Path) -> None: | |
| """ | |
| Auxiliary method that does inference using the pretrained models: | |
| Recognizes text in an image given its path. | |
| """ | |
| img = cv2.imread(fn_img, cv2.IMREAD_GRAYSCALE) | |
| assert img is not None | |
| preprocessor = Preprocessor(get_img_size(line_mode), dynamic_width=True, padding=16) | |
| img = preprocessor.process_img(img) | |
| batch = Batch([img], None, 1) | |
| recognized, probability = model.infer_batch(batch, True) | |
| return [recognized, probability] | |
| def infer_super_model(image_path) -> None: | |
| reader = easyocr.Reader(['en']) # Initialize EasyOCR reader | |
| result = reader.readtext(image_path) | |
| recognized_texts = [text[1] for text in result] # Extract recognized texts | |
| probabilities = [text[2] for text in result] # Extract probabilities | |
| return recognized_texts, probabilities | |
| def main(): | |
| st.title('Extract text from Image Demo') | |
| st.markdown(""" | |
| Streamlit Web Interface for Handwritten Text Recognition (HTR), Optical Character Recognition (OCR) | |
| implemented with TensorFlow and trained on the IAM off-line HTR dataset. | |
| The model takes images of single words or text lines (multiple words) as input and outputs the recognized text. | |
| """, unsafe_allow_html=True) | |
| st.markdown(""" | |
| Predictions can be made using one of two models: | |
| - Single_Model (Trained on Single Word Images) | |
| - Line_Model (Trained on Text Line Images) | |
| - Super_Model ( Most Robust Option for English ) | |
| - Burmese (Link) | |
| """, unsafe_allow_html=True) | |
| st.subheader('Select a Model, Choose the Arguments and Draw in the box below or Upload an Image to obtain a prediction.') | |
| #Selectors for the model and decoder | |
| modelSelect = st.selectbox("Select a Model", ['Single_Model', 'Line_Model', 'Super_Model']) | |
| if modelSelect != 'Super_Model': | |
| decoderSelect = st.selectbox("Select a Decoder", ['Bestpath', 'Beamsearch', 'Wordbeamsearch']) | |
| #Mappings (dictionaries) for the model and decoder. Asigns the directory or the DecoderType of the selected option. | |
| modelMapping = { | |
| "Single_Model": 'model/word-model', | |
| "Line_Model": 'model/line-model' | |
| } | |
| decoderMapping = { | |
| 'Bestpath': DecoderType.BestPath, | |
| 'Beamsearch': DecoderType.BeamSearch, | |
| 'Wordbeamsearch': DecoderType.WordBeamSearch | |
| } | |
| #Slider for pencil width | |
| strokeWidth = st.slider("Stroke Width: ", 1, 25, 6) | |
| #Canvas/Text Box for user input. BackGround Color must be white (#FFFFFF) or else text will not be properly recognised. | |
| inputDrawn = st_canvas( | |
| fill_color="rgba(255, 165, 0, 0.3)", | |
| stroke_width=strokeWidth, | |
| update_streamlit=True, | |
| background_image=None, | |
| height = 200, | |
| width = 400, | |
| drawing_mode='freedraw', | |
| key="canvas", | |
| background_color = '#FFFFFF' | |
| ) | |
| #Buffer for user input (images uploaded from the user's device) | |
| inputBuffer = st.file_uploader("Upload an Image", type=["png"]) | |
| #Inference Button | |
| inferBool = st.button("Recognize Text") | |
| # After clicking the "Recognize Text" button, check if the model selected is Super_Model | |
| if inferBool: | |
| if modelSelect == 'Super_Model': | |
| inputArray = None # Initialize inputArray to None | |
| # Handling uploaded file | |
| if inputBuffer is not None: | |
| with Image.open(inputBuffer).convert('RGB') as img: | |
| inputArray = np.array(img) | |
| # Handling canvas data | |
| elif inputDrawn.image_data is not None: | |
| # Convert RGBA to RGB | |
| inputArray = cv2.cvtColor(np.array(inputDrawn.image_data, dtype=np.uint8), cv2.COLOR_RGBA2RGB) | |
| # Now check if inputArray has been set | |
| if inputArray is not None: | |
| # Initialize EasyOCR Reader | |
| reader = easyocr.Reader(['en']) # Assuming English language; adjust as necessary | |
| # Perform OCR | |
| results = reader.readtext(inputArray) | |
| # Display results | |
| all_text = '' | |
| for (bbox, text, prob) in results: | |
| all_text += f'{text} (confidence: {prob:.2f})\n' | |
| st.write("**Recognized Texts and their Confidence Scores:**") | |
| st.text(all_text) | |
| else: | |
| st.write("No image data found. Please upload an image or draw on the canvas.") | |
| else: | |
| # Handle other model selections as before | |
| if ((inputDrawn.image_data is not None or inputBuffer is not None) and inferBool == True): | |
| #We turn the input into a numpy array | |
| if inputDrawn.image_data is not None: | |
| inputArray = np.array(inputDrawn.image_data) | |
| if inputBuffer is not None: | |
| inputBufferImage = Image.open(inputBuffer) | |
| inputArray = np.array(inputBufferImage) | |
| #We turn this array into a .png format and save it. | |
| inputImage = Image.fromarray(inputArray.astype('uint8'), 'RGBA') | |
| inputImage.save('userInput.png') | |
| #We obtain the model directory and the decoder type from their mapping | |
| modelDir = modelMapping[modelSelect] | |
| decoderType = decoderMapping[decoderSelect] | |
| #Finally, we call the model with this image as attribute and display the Best Candidate and its probability on the Interface | |
| model = Model(list(open(modelDir + "/charList.txt").read()), modelDir, decoderType, must_restore=True) | |
| inferedText = infer(modelDir == '../model/line-model', model, 'userInput.png') | |
| st.write("**Best Candidate: **", inferedText[0][0]) | |
| st.write("**Probability: **", str(inferedText[1][0]*100) + "%") | |
| if __name__ == "__main__": | |
| main() |