Spaces:

willwade
/

chaplinDemo

Runtime error

App Files Files Community

willwade commited on Feb 3

Commit

5b30a24

verified ·

1 Parent(s): 9a1700f

Create app.py

Browse files

Files changed (1) hide show

app.py +74 -0

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import gradio as gr
+import cv2
+import torch
+from pipelines.pipeline import InferencePipeline
+import time
+class ChaplinGradio:
+    def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.vsr_model = None
+        self.load_models()
+        # Video params
+        self.fps = 16
+        self.frame_interval = 1 / self.fps
+        self.frame_compression = 25
+        self.last_frame_time = time.time()
+    def load_models(self):
+        """Load models using the InferencePipeline with HF Space defaults"""
+        config = {
+            "model": {
+                "name": "chaplin_vsr",
+                "weights": "models/chaplin_vsr.pth",
+                "detector": "mediapipe"
+            }
+        }
+        self.vsr_model = InferencePipeline(
+            config,
+            device=self.device,
+            detector="mediapipe",
+            face_track=True
+        )
+        print("Model loaded successfully!")
+    def process_frame(self, frame):
+        """Process a single frame with rate limiting and compression"""
+        current_time = time.time()
+        if current_time - self.last_frame_time < self.frame_interval:
+            return None
+        self.last_frame_time = current_time
+        if frame is None:
+            return "No video input detected"
+        # Compress frame
+        encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), self.frame_compression]
+        _, buffer = cv2.imencode('.jpg', frame, encode_param)
+        compressed_frame = cv2.imdecode(buffer, cv2.IMREAD_GRAYSCALE)
+        # Run inference using the VSR model
+        predicted_text = self.vsr_model.process_frame(compressed_frame)
+        return predicted_text
+# Create Gradio interface
+chaplin = ChaplinGradio()
+iface = gr.Interface(
+    fn=chaplin.process_frame,
+    inputs=gr.Image(source="webcam", streaming=True),
+    outputs=gr.Textbox(label="Predicted Text"),
+    title="Chaplin - Live Visual Speech Recognition",
+    description="Use your webcam to perform real-time visual speech recognition.",
+    live=True
+)
+if __name__ == "__main__":
+    iface.launch()