Haryiank commited on
Commit
4acc044
·
verified ·
1 Parent(s): 35be27c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import cv2
4
+ from transformers import AutoImageProcessor, SiglipForImageClassification
5
+ from collections import Counter
6
+
7
+ # Load model
8
+ model_name = "prithivMLmods/Alphabet-Sign-Language-Detection"
9
+ processor = AutoImageProcessor.from_pretrained(model_name)
10
+ model = SiglipForImageClassification.from_pretrained(model_name)
11
+
12
+ def predict_from_video(video_path):
13
+ cap = cv2.VideoCapture(video_path)
14
+ predictions = []
15
+
16
+ while cap.isOpened():
17
+ ret, frame = cap.read()
18
+ if not ret:
19
+ break
20
+
21
+ # Convert frame to RGB
22
+ img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
23
+
24
+ # Run model
25
+ inputs = processor(images=img, return_tensors="pt")
26
+ outputs = model(**inputs)
27
+ probs = outputs.logits.softmax(dim=-1)[0]
28
+
29
+ idx = int(probs.argmax())
30
+ label = model.config.id2label[idx]
31
+ predictions.append(label)
32
+
33
+ cap.release()
34
+
35
+ # Majority vote
36
+ if predictions:
37
+ most_common = Counter(predictions).most_common(1)[0]
38
+ return f"Predicted Letter: {most_common[0]} (appeared {most_common[1]} times)"
39
+ else:
40
+ return "No frames processed."
41
+
42
+ iface = gr.Interface(
43
+ fn=predict_from_video,
44
+ inputs=gr.Video(), # ✅ no 'type' argument in Gradio 5.x
45
+ outputs="text",
46
+ title="ASL Alphabet Recognition from Video",
47
+ description="Upload a short video of your ASL sign (A–Z). The system will analyze frames and predict the most likely letter."
48
+ )
49
+
50
+ iface.launch()