Spaces:
Runtime error
Runtime error
fix frames as video - more time
Browse files
app.py
CHANGED
@@ -22,7 +22,8 @@ class ChaplinGradio:
|
|
22 |
|
23 |
# Frame buffer
|
24 |
self.frame_buffer = []
|
25 |
-
self.
|
|
|
26 |
|
27 |
def download_models(self):
|
28 |
"""Download required model files from HuggingFace"""
|
@@ -65,7 +66,7 @@ class ChaplinGradio:
|
|
65 |
current_time = time.time()
|
66 |
|
67 |
if current_time - self.last_frame_time < self.frame_interval:
|
68 |
-
return
|
69 |
|
70 |
self.last_frame_time = current_time
|
71 |
|
@@ -80,8 +81,8 @@ class ChaplinGradio:
|
|
80 |
# Add frame to buffer
|
81 |
self.frame_buffer.append(frame)
|
82 |
|
83 |
-
#
|
84 |
-
if len(self.frame_buffer) >= self.
|
85 |
# Create temp directory if it doesn't exist
|
86 |
os.makedirs("temp", exist_ok=True)
|
87 |
|
@@ -105,13 +106,15 @@ class ChaplinGradio:
|
|
105 |
out.write(f)
|
106 |
out.release()
|
107 |
|
108 |
-
# Clear buffer
|
109 |
-
self.frame_buffer = []
|
110 |
|
111 |
try:
|
112 |
# Process the video file using the pipeline
|
113 |
predicted_text = self.vsr_model(temp_video)
|
114 |
-
|
|
|
|
|
115 |
|
116 |
except Exception as e:
|
117 |
print(f"Error during inference: {str(e)}")
|
@@ -121,7 +124,7 @@ class ChaplinGradio:
|
|
121 |
if os.path.exists(temp_video):
|
122 |
os.remove(temp_video)
|
123 |
|
124 |
-
return "
|
125 |
|
126 |
except Exception as e:
|
127 |
print(f"Error processing: {str(e)}")
|
@@ -134,9 +137,9 @@ chaplin = ChaplinGradio()
|
|
134 |
iface = gr.Interface(
|
135 |
fn=chaplin.process_frame,
|
136 |
inputs=gr.Image(sources=["webcam"], streaming=True),
|
137 |
-
outputs=gr.Textbox(label="Predicted Text"),
|
138 |
title="Chaplin - Live Visual Speech Recognition",
|
139 |
-
description="
|
140 |
live=True
|
141 |
)
|
142 |
|
|
|
22 |
|
23 |
# Frame buffer
|
24 |
self.frame_buffer = []
|
25 |
+
self.min_frames = 32 # 2 seconds of video at 16 fps
|
26 |
+
self.last_prediction = ""
|
27 |
|
28 |
def download_models(self):
|
29 |
"""Download required model files from HuggingFace"""
|
|
|
66 |
current_time = time.time()
|
67 |
|
68 |
if current_time - self.last_frame_time < self.frame_interval:
|
69 |
+
return self.last_prediction
|
70 |
|
71 |
self.last_frame_time = current_time
|
72 |
|
|
|
81 |
# Add frame to buffer
|
82 |
self.frame_buffer.append(frame)
|
83 |
|
84 |
+
# Process when we have enough frames
|
85 |
+
if len(self.frame_buffer) >= self.min_frames:
|
86 |
# Create temp directory if it doesn't exist
|
87 |
os.makedirs("temp", exist_ok=True)
|
88 |
|
|
|
106 |
out.write(f)
|
107 |
out.release()
|
108 |
|
109 |
+
# Clear buffer but keep last few frames for continuity
|
110 |
+
self.frame_buffer = self.frame_buffer[-8:] # Keep last 0.5 seconds
|
111 |
|
112 |
try:
|
113 |
# Process the video file using the pipeline
|
114 |
predicted_text = self.vsr_model(temp_video)
|
115 |
+
if predicted_text:
|
116 |
+
self.last_prediction = predicted_text
|
117 |
+
return self.last_prediction
|
118 |
|
119 |
except Exception as e:
|
120 |
print(f"Error during inference: {str(e)}")
|
|
|
124 |
if os.path.exists(temp_video):
|
125 |
os.remove(temp_video)
|
126 |
|
127 |
+
return self.last_prediction or "Waiting for speech..."
|
128 |
|
129 |
except Exception as e:
|
130 |
print(f"Error processing: {str(e)}")
|
|
|
137 |
iface = gr.Interface(
|
138 |
fn=chaplin.process_frame,
|
139 |
inputs=gr.Image(sources=["webcam"], streaming=True),
|
140 |
+
outputs=gr.Textbox(label="Predicted Text", interactive=False),
|
141 |
title="Chaplin - Live Visual Speech Recognition",
|
142 |
+
description="Speak clearly into the webcam. The model will process your speech in ~2 second chunks.",
|
143 |
live=True
|
144 |
)
|
145 |
|