import gradio as gr import cv2 import numpy as np from tensorflow.keras.models import load_model import mediapipe as mp # Load your label to alphabet mapping from config import label_to_alphabet # Ensure this file has the correct mapping # Load the saved ASL model model = load_model("model/asl_model.h5") # Initialize MediaPipe for hand detection mp_hands = mp.solutions.hands hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5) mp_drawing = mp.solutions.drawing_utils # For hand landmark drawing def detect_and_crop_hand(image): """ Detect the hand in the image, crop the region, and return the cropped hand image. """ # Convert the image to RGB format (required by MediaPipe) rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Detect hand landmarks results = hands.process(rgb_image) if results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks: # Get the image dimensions h, w, _ = image.shape x_min = w y_min = h x_max = y_max = 0 # Loop through landmarks to determine bounding box for the hand for landmark in hand_landmarks.landmark: x, y = int(landmark.x * w), int(landmark.y * h) x_min = min(x, x_min) y_min = min(y, y_min) x_max = max(x, x_max) y_max = max(y, y_max) # Crop the hand portion from the image cropped_hand = image[y_min:y_max, x_min:x_max] # Optional: Draw the landmarks on the original image for visualization (for debugging) mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS) return cropped_hand # If no hand is detected, return None return None def preprocess_hand_image(hand_image): """ Preprocess the cropped hand image for the ASL recognition model. This involves resizing, normalizing, and reshaping the image. """ # Resize the image to 150x150 pixels (or your model's input size) hand_image_resized = cv2.resize(hand_image, (150, 150)) # Normalize the image (scale pixel values to [0, 1]) hand_image_normalized = hand_image_resized / 255.0 # Reshape the image to match the model's expected input shape (1, 150, 150, 3) hand_image_reshaped = np.expand_dims(hand_image_normalized, axis=0) return hand_image_reshaped def predict_asl_alphabet(cropped_hand): """ Feed the cropped hand image into the ASL recognition model and return the predicted alphabet. """ # Preprocess the hand image processed_hand = preprocess_hand_image(cropped_hand) # Make the prediction using the ASL model predictions = model.predict(processed_hand) # Get the predicted label (the index of the highest predicted probability) predicted_label = np.argmax(predictions[0]) # Map the label to the corresponding alphabet predicted_alphabet = label_to_alphabet[predicted_label] return predicted_alphabet # Gradio interface function def process_video_frame(image): """ Process the webcam feed to detect, crop the hand, and predict the ASL alphabet. """ # Detect and crop the hand from the image cropped_hand = detect_and_crop_hand(image) if cropped_hand is None: return "No hand detected" # Predict the ASL alphabet using the cropped hand image predicted_alphabet = predict_asl_alphabet(cropped_hand) return predicted_alphabet # Return the predicted alphabet # Gradio interface setup iface = gr.Interface( fn=process_video_frame, inputs=gr.Image(sources=["webcam"], streaming=True), # Webcam input outputs="text", # Display the predicted alphabet live=True, # Enable live video streaming description="Real-Time ASL Hand Gesture Recognition" ) # Launch the Gradio app iface.launch()