Spaces:

tstone87
/

stance-detection

Running

App Files Files Community

tstone87 commited on Mar 4

Commit

30fd2ce

verified ·

1 Parent(s): 1acb351

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -14

app.py CHANGED Viewed

@@ -18,8 +18,8 @@ def process_input(uploaded_file, youtube_link, image_url, sensitivity):
     Priority: YouTube link > Image URL > Uploaded file.
     The sensitivity slider value is passed as the confidence threshold.
-    For video files (mp4, mov, avi, webm), we use streaming mode to obtain annotated frames and encode them into a video.
-    For images, we use the normal prediction and either use the built‑in save_path or plot() method.
     Returns a tuple:
       - download_file_path (for gr.File)
@@ -65,26 +65,28 @@ def process_input(uploaded_file, youtube_link, image_url, sensitivity):
     output_path = None
     if ext_input in video_exts:
-        # Process video using streaming mode.
         try:
-            # Open video to get properties.
             cap = cv2.VideoCapture(input_path)
             if not cap.isOpened():
                 return None, None, None, "Error opening video file."
             fps = cap.get(cv2.CAP_PROP_FPS)
             width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
             height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-            cap.release()
-            # Use streaming mode to process each frame.
             frames = []
-            for result in model.predict(source=input_path, stream=True, conf=sensitivity):
-                # result.plot() returns an annotated frame (numpy array)
-                annotated_frame = result.plot()
                 frames.append(annotated_frame)
             if not frames:
-                return None, None, None, "No detections were returned from video streaming."
-            # Write frames to a temporary video file.
             temp_video_path = os.path.join(tempfile.gettempdir(), "annotated_video.mp4")
             fourcc = cv2.VideoWriter_fourcc(*'mp4v')
             out = cv2.VideoWriter(temp_video_path, fourcc, fps, (width, height))
@@ -100,7 +102,6 @@ def process_input(uploaded_file, youtube_link, image_url, sensitivity):
             results = model.predict(source=input_path, save=True, conf=sensitivity)
         except Exception as e:
             return None, None, None, f"Error running prediction: {e}"
         try:
             if not results or len(results) == 0:
                 return None, None, None, "No detections were returned."
@@ -147,7 +148,7 @@ with gr.Blocks(css="""
                     youtube_input = gr.Textbox(label="YouTube Link", placeholder="https://...")
                 with gr.TabItem("Image URL"):
                     image_url_input = gr.Textbox(label="Image URL", placeholder="https://...")
-            sensitivity_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.2,
                                            label="Sensitivity (Confidence Threshold)")
         # Right Column: Results display at the top.
         with gr.Column(scale=2):

     Priority: YouTube link > Image URL > Uploaded file.
     The sensitivity slider value is passed as the confidence threshold.
+    For video files (mp4, mov, avi, webm), we process the video frame-by-frame
+    using OpenCV. For images, we use normal prediction.
     Returns a tuple:
       - download_file_path (for gr.File)
     output_path = None
     if ext_input in video_exts:
+        # Process video frame-by-frame using OpenCV.
         try:
             cap = cv2.VideoCapture(input_path)
             if not cap.isOpened():
                 return None, None, None, "Error opening video file."
             fps = cap.get(cv2.CAP_PROP_FPS)
             width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
             height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
             frames = []
+            while True:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                # Run detection on the frame.
+                # Note: model.predict() accepts an image (numpy array) as source.
+                result = model.predict(source=frame, conf=sensitivity)[0]
+                annotated_frame = result.plot()  # returns an annotated frame (numpy array)
                 frames.append(annotated_frame)
+            cap.release()
             if not frames:
+                return None, None, None, "No detections were returned from video processing."
+            # Write annotated frames to a temporary video file.
             temp_video_path = os.path.join(tempfile.gettempdir(), "annotated_video.mp4")
             fourcc = cv2.VideoWriter_fourcc(*'mp4v')
             out = cv2.VideoWriter(temp_video_path, fourcc, fps, (width, height))
             results = model.predict(source=input_path, save=True, conf=sensitivity)
         except Exception as e:
             return None, None, None, f"Error running prediction: {e}"
         try:
             if not results or len(results) == 0:
                 return None, None, None, "No detections were returned."
                     youtube_input = gr.Textbox(label="YouTube Link", placeholder="https://...")
                 with gr.TabItem("Image URL"):
                     image_url_input = gr.Textbox(label="Image URL", placeholder="https://...")
+            sensitivity_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.15,
                                            label="Sensitivity (Confidence Threshold)")
         # Right Column: Results display at the top.
         with gr.Column(scale=2):