File size: 4,032 Bytes
9ddd8d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import gradio as gr
import cv2
import numpy as np
from tensorflow.keras.models import load_model
import mediapipe as mp

# Load your label to alphabet mapping
from config import label_to_alphabet  # Ensure this file has the correct mapping

# Load the saved ASL model
model = load_model("model/asl_model.h5")

# Initialize MediaPipe for hand detection
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils  # For hand landmark drawing

def detect_and_crop_hand(image):
    """
    Detect the hand in the image, crop the region, and return the cropped hand image.
    """
    # Convert the image to RGB format (required by MediaPipe)
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Detect hand landmarks
    results = hands.process(rgb_image)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Get the image dimensions
            h, w, _ = image.shape
            x_min = w
            y_min = h
            x_max = y_max = 0
            
            # Loop through landmarks to determine bounding box for the hand
            for landmark in hand_landmarks.landmark:
                x, y = int(landmark.x * w), int(landmark.y * h)
                x_min = min(x, x_min)
                y_min = min(y, y_min)
                x_max = max(x, x_max)
                y_max = max(y, y_max)
            
            # Crop the hand portion from the image
            cropped_hand = image[y_min:y_max, x_min:x_max]
            
            # Optional: Draw the landmarks on the original image for visualization (for debugging)
            mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            
            return cropped_hand
    
    # If no hand is detected, return None
    return None

def preprocess_hand_image(hand_image):
    """
    Preprocess the cropped hand image for the ASL recognition model.
    This involves resizing, normalizing, and reshaping the image.
    """
    # Resize the image to 150x150 pixels (or your model's input size)
    hand_image_resized = cv2.resize(hand_image, (150, 150))
    
    # Normalize the image (scale pixel values to [0, 1])
    hand_image_normalized = hand_image_resized / 255.0
    
    # Reshape the image to match the model's expected input shape (1, 150, 150, 3)
    hand_image_reshaped = np.expand_dims(hand_image_normalized, axis=0)
    
    return hand_image_reshaped

def predict_asl_alphabet(cropped_hand):
    """
    Feed the cropped hand image into the ASL recognition model and return the predicted alphabet.
    """
    # Preprocess the hand image
    processed_hand = preprocess_hand_image(cropped_hand)
    
    # Make the prediction using the ASL model
    predictions = model.predict(processed_hand)
    
    # Get the predicted label (the index of the highest predicted probability)
    predicted_label = np.argmax(predictions[0])
    
    # Map the label to the corresponding alphabet
    predicted_alphabet = label_to_alphabet[predicted_label]
    
    return predicted_alphabet

# Gradio interface function
def process_video_frame(image):
    """
    Process the webcam feed to detect, crop the hand, and predict the ASL alphabet.
    """
    # Detect and crop the hand from the image
    cropped_hand = detect_and_crop_hand(image)
    
    if cropped_hand is None:
        return "No hand detected"
    
    # Predict the ASL alphabet using the cropped hand image
    predicted_alphabet = predict_asl_alphabet(cropped_hand)
    
    return predicted_alphabet  # Return the predicted alphabet

# Gradio interface setup
iface = gr.Interface(
    fn=process_video_frame, 
    inputs=gr.Image(sources=["webcam"], streaming=True),  # Webcam input
    outputs="text",  # Display the predicted alphabet
    live=True,  # Enable live video streaming
    description="Real-Time ASL Hand Gesture Recognition"
)

# Launch the Gradio app
iface.launch()