Spaces:
Sleeping
Sleeping
File size: 4,032 Bytes
9ddd8d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import gradio as gr
import cv2
import numpy as np
from tensorflow.keras.models import load_model
import mediapipe as mp
# Load your label to alphabet mapping
from config import label_to_alphabet # Ensure this file has the correct mapping
# Load the saved ASL model
model = load_model("model/asl_model.h5")
# Initialize MediaPipe for hand detection
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils # For hand landmark drawing
def detect_and_crop_hand(image):
"""
Detect the hand in the image, crop the region, and return the cropped hand image.
"""
# Convert the image to RGB format (required by MediaPipe)
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Detect hand landmarks
results = hands.process(rgb_image)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
# Get the image dimensions
h, w, _ = image.shape
x_min = w
y_min = h
x_max = y_max = 0
# Loop through landmarks to determine bounding box for the hand
for landmark in hand_landmarks.landmark:
x, y = int(landmark.x * w), int(landmark.y * h)
x_min = min(x, x_min)
y_min = min(y, y_min)
x_max = max(x, x_max)
y_max = max(y, y_max)
# Crop the hand portion from the image
cropped_hand = image[y_min:y_max, x_min:x_max]
# Optional: Draw the landmarks on the original image for visualization (for debugging)
mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
return cropped_hand
# If no hand is detected, return None
return None
def preprocess_hand_image(hand_image):
"""
Preprocess the cropped hand image for the ASL recognition model.
This involves resizing, normalizing, and reshaping the image.
"""
# Resize the image to 150x150 pixels (or your model's input size)
hand_image_resized = cv2.resize(hand_image, (150, 150))
# Normalize the image (scale pixel values to [0, 1])
hand_image_normalized = hand_image_resized / 255.0
# Reshape the image to match the model's expected input shape (1, 150, 150, 3)
hand_image_reshaped = np.expand_dims(hand_image_normalized, axis=0)
return hand_image_reshaped
def predict_asl_alphabet(cropped_hand):
"""
Feed the cropped hand image into the ASL recognition model and return the predicted alphabet.
"""
# Preprocess the hand image
processed_hand = preprocess_hand_image(cropped_hand)
# Make the prediction using the ASL model
predictions = model.predict(processed_hand)
# Get the predicted label (the index of the highest predicted probability)
predicted_label = np.argmax(predictions[0])
# Map the label to the corresponding alphabet
predicted_alphabet = label_to_alphabet[predicted_label]
return predicted_alphabet
# Gradio interface function
def process_video_frame(image):
"""
Process the webcam feed to detect, crop the hand, and predict the ASL alphabet.
"""
# Detect and crop the hand from the image
cropped_hand = detect_and_crop_hand(image)
if cropped_hand is None:
return "No hand detected"
# Predict the ASL alphabet using the cropped hand image
predicted_alphabet = predict_asl_alphabet(cropped_hand)
return predicted_alphabet # Return the predicted alphabet
# Gradio interface setup
iface = gr.Interface(
fn=process_video_frame,
inputs=gr.Image(sources=["webcam"], streaming=True), # Webcam input
outputs="text", # Display the predicted alphabet
live=True, # Enable live video streaming
description="Real-Time ASL Hand Gesture Recognition"
)
# Launch the Gradio app
iface.launch()
|