Spaces:

krishnamishra8848
/

devanagari_character_recognition

Sleeping

App Files Files Community

krishnamishra8848 commited on Dec 20, 2024

Commit

ae1af43

verified ·

1 Parent(s): ecce3f5

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -25

app.py CHANGED Viewed

@@ -3,23 +3,32 @@ import numpy as np
 from tensorflow.keras.models import load_model
 from PIL import Image
 import requests
-# Cache the model with st.cache_resource
 @st.cache_resource
-def load_model_from_hf():
-    # Download the model from Hugging Face
     url = "https://huggingface.co/krishnamishra8848/Devanagari_Character_Recognition/resolve/main/saved_model.keras"
     response = requests.get(url)
     with open("saved_model.keras", "wb") as f:
         f.write(response.content)
-    # Load the model
-    model = load_model("saved_model.keras")
-    return model
-# Load the model
-model = load_model_from_hf()
-# Nepali characters mapping
 label_mapping = [
     "क", "ख", "ग", "घ", "ङ", "च", "छ", "ज", "झ", "ञ",
     "ट", "ठ", "ड", "ढ", "ण", "त", "थ", "द", "ध", "न",
@@ -29,27 +38,48 @@ label_mapping = [
 ]
 # Streamlit App
-st.title("Devanagari Character Recognition")
-st.write("Upload an image of a Devanagari character or digit, and the model will predict it.")
-# File uploader for user to upload images
 uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
     try:
-        # Preprocess the image
-        img = Image.open(uploaded_file).convert("L")  # Convert to grayscale
-        img_resized = img.resize((32, 32))  # Resize to 32x32
-        img_array = np.array(img_resized).astype("float32") / 255.0  # Normalize pixel values
-        img_input = img_array.reshape(1, 32, 32, 1)  # Reshape for the model
-        # Make prediction
-        prediction = model.predict(img_input)
-        predicted_class_index = np.argmax(prediction)
-        predicted_character = label_mapping[predicted_class_index]
-        # Display the predicted character
-        st.success(f"Predicted Character: {predicted_character}")
     except Exception as e:
         st.error(f"An error occurred: {e}")

 from tensorflow.keras.models import load_model
 from PIL import Image
 import requests
+from ultralytics import YOLO
+import cv2
+# Cache the character recognition model
 @st.cache_resource
+def load_character_model():
     url = "https://huggingface.co/krishnamishra8848/Devanagari_Character_Recognition/resolve/main/saved_model.keras"
     response = requests.get(url)
     with open("saved_model.keras", "wb") as f:
         f.write(response.content)
+    return load_model("saved_model.keras")
+# Cache the YOLO detection model
+@st.cache_resource
+def load_detection_model():
+    weights_path = "https://huggingface.co/krishnamishra8848/Nepal-Vehicle-License-Plate-Detection/resolve/main/last.pt"
+    response = requests.get(weights_path)
+    with open("last.pt", "wb") as f:
+        f.write(response.content)
+    return YOLO("last.pt")
+# Load models
+character_model = load_character_model()
+detection_model = load_detection_model()
+# Nepali character mapping
 label_mapping = [
     "क", "ख", "ग", "घ", "ङ", "च", "छ", "ज", "झ", "ञ",
     "ट", "ठ", "ड", "ढ", "ण", "त", "थ", "द", "ध", "न",
 ]
 # Streamlit App
+st.title("Bounding Box Text Recognition")
+st.write("Upload an image containing Devanagari text, and the model will detect bounding boxes and predict text.")
+# File uploader
 uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
     try:
+        # Load and preprocess the image
+        img = Image.open(uploaded_file).convert("RGB")
+        img_array = np.array(img)
+        img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)  # Convert to OpenCV format
+        # Detect bounding boxes with YOLO
+        results = detection_model(img_bgr)
+        # Initialize recognized text
+        recognized_text = ""
+        # Iterate through detected bounding boxes
+        for result in results:
+            if hasattr(result, 'boxes') and result.boxes is not None:
+                for box in result.boxes.xyxy:
+                    x1, y1, x2, y2 = map(int, box)  # Extract bounding box coordinates
+                    cropped_img = img_bgr[y1:y2, x1:x2]  # Crop the detected region
+                    # Preprocess the cropped image
+                    cropped_resized = cv2.resize(cropped_img, (32, 32), interpolation=cv2.INTER_AREA)
+                    cropped_gray = cv2.cvtColor(cropped_resized, cv2.COLOR_BGR2GRAY)
+                    cropped_normalized = cropped_gray.astype("float32") / 255.0
+                    cropped_input = cropped_normalized.reshape(1, 32, 32, 1)
+                    # Predict text for the cropped region
+                    prediction = character_model.predict(cropped_input)
+                    predicted_index = np.argmax(prediction)
+                    predicted_character = label_mapping[predicted_index]
+                    # Append to the recognized text
+                    recognized_text += predicted_character
+        # Display the recognized text
+        st.success(f"Recognized Text: {recognized_text}")
     except Exception as e:
         st.error(f"An error occurred: {e}")