Spaces:

tstone87
/

stance-detection

Running

App Files Files Community

tstone87 commited on Mar 4

Commit

7aa805e

verified ·

1 Parent(s): 30fd2ce

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -82

app.py CHANGED Viewed

@@ -6,129 +6,120 @@ import cv2
 import requests
 from ultralytics import YOLO
-# Remove extra CLI arguments that Spaces might pass.
 sys.argv = [arg for arg in sys.argv if arg != "--import"]
-# Load the YOLO11-pose model (auto-downloads if needed)
 model = YOLO("yolo11n-pose.pt")
 def process_input(uploaded_file, youtube_link, image_url, sensitivity):
     """
-    Process input from one of three methods (Upload, YouTube, Image URL).
     Priority: YouTube link > Image URL > Uploaded file.
-    The sensitivity slider value is passed as the confidence threshold.
-    For video files (mp4, mov, avi, webm), we process the video frame-by-frame
-    using OpenCV. For images, we use normal prediction.
-    Returns a tuple:
-      - download_file_path (for gr.File)
-      - image_result (for gr.Image) or None
-      - video_result (for gr.Video) or None
-      - status message
     """
     input_path = None
     # Priority 1: YouTube link
     if youtube_link and youtube_link.strip():
         try:
-            from pytube import YouTube
             yt = YouTube(youtube_link)
             stream = yt.streams.filter(file_extension='mp4', progressive=True).order_by("resolution").desc().first()
-            if stream is None:
                 return None, None, None, "No suitable mp4 stream found."
-            input_path = stream.download()
         except Exception as e:
-            return None, None, None, f"Error downloading video: {e}"
     # Priority 2: Image URL
     elif image_url and image_url.strip():
         try:
-            response = requests.get(image_url, stream=True)
-            if response.status_code != 200:
-                return None, None, None, f"Error downloading image: HTTP {response.status_code}"
-            temp_image_path = os.path.join(tempfile.gettempdir(), "downloaded_image.jpg")
-            with open(temp_image_path, "wb") as f:
                 f.write(response.content)
-            input_path = temp_image_path
         except Exception as e:
-            return None, None, None, f"Error downloading image: {e}"
     # Priority 3: Uploaded file
     elif uploaded_file is not None:
         input_path = uploaded_file.name
     else:
-        return None, None, None, "Please provide an input using one of the methods."
-    # Determine if input is a video (by extension).
-    ext_input = os.path.splitext(input_path)[1].lower()
     video_exts = [".mp4", ".mov", ".avi", ".webm"]
     output_path = None
-    if ext_input in video_exts:
-        # Process video frame-by-frame using OpenCV.
-        try:
             cap = cv2.VideoCapture(input_path)
             if not cap.isOpened():
                 return None, None, None, "Error opening video file."
             fps = cap.get(cv2.CAP_PROP_FPS)
             width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
             height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-            frames = []
             while True:
                 ret, frame = cap.read()
                 if not ret:
                     break
-                # Run detection on the frame.
-                # Note: model.predict() accepts an image (numpy array) as source.
-                result = model.predict(source=frame, conf=sensitivity)[0]
-                annotated_frame = result.plot()  # returns an annotated frame (numpy array)
-                frames.append(annotated_frame)
             cap.release()
-            if not frames:
-                return None, None, None, "No detections were returned from video processing."
-            # Write annotated frames to a temporary video file.
-            temp_video_path = os.path.join(tempfile.gettempdir(), "annotated_video.mp4")
-            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-            out = cv2.VideoWriter(temp_video_path, fourcc, fps, (width, height))
-            for frame in frames:
-                out.write(frame)
             out.release()
-            output_path = temp_video_path
-        except Exception as e:
-            return None, None, None, f"Error processing video: {e}"
-    else:
-        # Process as an image.
-        try:
-            results = model.predict(source=input_path, save=True, conf=sensitivity)
-        except Exception as e:
-            return None, None, None, f"Error running prediction: {e}"
-        try:
-            if not results or len(results) == 0:
-                return None, None, None, "No detections were returned."
-            if hasattr(results[0], "save_path"):
-                output_path = results[0].save_path
-            else:
-                annotated = results[0].plot()  # returns a numpy array
-                output_path = os.path.join(tempfile.gettempdir(), "annotated.jpg")
-                cv2.imwrite(output_path, annotated)
-        except Exception as e:
-            return None, None, None, f"Error processing the file: {e}"
-    # Clean up temporary input if downloaded.
-    if ((youtube_link and youtube_link.strip()) or (image_url and image_url.strip())) and input_path and os.path.exists(input_path):
-        os.remove(input_path)
-    # Set outputs based on output file extension.
-    ext_output = os.path.splitext(output_path)[1].lower()
-    if ext_output in video_exts:
-        image_result = None
-        video_result = output_path
-    else:
-        image_result = output_path
-        video_result = None
-    return output_path, image_result, video_result, "Success!"
 with gr.Blocks(css="""
 .result_img > img {
   width: 100%;
@@ -137,7 +128,6 @@ with gr.Blocks(css="""
 }
 """) as demo:
     with gr.Row():
-        # Left Column: Header image, title, input tabs, and sensitivity slider.
         with gr.Column(scale=1):
             gr.HTML("<div style='text-align:center;'><img src='https://huggingface.co/spaces/tstone87/stance-detection/resolve/main/crowdresult.jpg' style='width:25%;'/></div>")
             gr.Markdown("## Pose Detection with YOLO11-pose")
@@ -148,9 +138,8 @@ with gr.Blocks(css="""
                     youtube_input = gr.Textbox(label="YouTube Link", placeholder="https://...")
                 with gr.TabItem("Image URL"):
                     image_url_input = gr.Textbox(label="Image URL", placeholder="https://...")
-            sensitivity_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.15,
-                                           label="Sensitivity (Confidence Threshold)")
-        # Right Column: Results display at the top.
         with gr.Column(scale=2):
             output_image = gr.Image(label="Annotated Output (Image)", elem_classes="result_img")
             output_video = gr.Video(label="Annotated Output (Video)")
@@ -174,4 +163,4 @@ with gr.Blocks(css="""
     )
 if __name__ == "__main__":
-    demo.launch()

 import requests
 from ultralytics import YOLO
+# Remove extra CLI arguments that Spaces might pass
 sys.argv = [arg for arg in sys.argv if arg != "--import"]
+# Load the YOLO11-pose model
 model = YOLO("yolo11n-pose.pt")
 def process_input(uploaded_file, youtube_link, image_url, sensitivity):
     """
+    Process input from Upload, YouTube, or Image URL.
     Priority: YouTube link > Image URL > Uploaded file.
+    Sensitivity is the confidence threshold.
     """
     input_path = None
+    temp_files = []
     # Priority 1: YouTube link
     if youtube_link and youtube_link.strip():
         try:
+            from pytubefix import YouTube  # Use pytubefix instead of pytube
             yt = YouTube(youtube_link)
             stream = yt.streams.filter(file_extension='mp4', progressive=True).order_by("resolution").desc().first()
+            if not stream:
                 return None, None, None, "No suitable mp4 stream found."
+            temp_path = os.path.join(tempfile.gettempdir(), f"yt_{os.urandom(8).hex()}.mp4")
+            stream.download(output_path=tempfile.gettempdir(), filename=os.path.basename(temp_path))
+            input_path = temp_path
+            temp_files.append(input_path)
         except Exception as e:
+            return None, None, None, f"Error downloading YouTube video: {str(e)}"
     # Priority 2: Image URL
     elif image_url and image_url.strip():
         try:
+            response = requests.get(image_url, stream=True, timeout=10)
+            response.raise_for_status()
+            temp_path = os.path.join(tempfile.gettempdir(), f"img_{os.urandom(8).hex()}.jpg")
+            with open(temp_path, "wb") as f:
                 f.write(response.content)
+            input_path = temp_path
+            temp_files.append(input_path)
         except Exception as e:
+            return None, None, None, f"Error downloading image: {str(e)}"
     # Priority 3: Uploaded file
     elif uploaded_file is not None:
         input_path = uploaded_file.name
     else:
+        return None, None, None, "Please provide an input."
+    # Process the file
+    ext = os.path.splitext(input_path)[1].lower()
     video_exts = [".mp4", ".mov", ".avi", ".webm"]
     output_path = None
+    try:
+        if ext in video_exts:
+            # Video processing
             cap = cv2.VideoCapture(input_path)
             if not cap.isOpened():
                 return None, None, None, "Error opening video file."
             fps = cap.get(cv2.CAP_PROP_FPS)
             width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
             height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            # Create output video
+            output_path = os.path.join(tempfile.gettempdir(), f"out_{os.urandom(8).hex()}.mp4")
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
             while True:
                 ret, frame = cap.read()
                 if not ret:
                     break
+                # Convert BGR to RGB for YOLO
+                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                results = model.predict(source=frame_rgb, conf=sensitivity)[0]
+                annotated_frame = results.plot()
+                # Convert back to BGR for video writing
+                annotated_frame_bgr = cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR)
+                out.write(annotated_frame_bgr)
             cap.release()
             out.release()
+            temp_files.append(output_path)
+            if os.path.getsize(output_path) == 0:
+                return None, None, None, "Error: Output video is empty."
+            return output_path, None, output_path, "Video processed successfully!"
+        else:
+            # Image processing
+            results = model.predict(source=input_path, conf=sensitivity)[0]
+            annotated = results.plot()
+            output_path = os.path.join(tempfile.gettempdir(), f"out_{os.urandom(8).hex()}.jpg")
+            cv2.imwrite(output_path, annotated)
+            temp_files.append(output_path)
+            return output_path, output_path, None, "Image processed successfully!"
+    except Exception as e:
+        return None, None, None, f"Processing error: {str(e)}"
+    finally:
+        # Clean up temporary input files (but keep output for download)
+        for f in temp_files[:-1]:  # Exclude output_path
+            if f and os.path.exists(f):
+                try:
+                    os.remove(f)
+                except:
+                    pass
+# Gradio interface remains mostly the same
 with gr.Blocks(css="""
 .result_img > img {
   width: 100%;
 }
 """) as demo:
     with gr.Row():
         with gr.Column(scale=1):
             gr.HTML("<div style='text-align:center;'><img src='https://huggingface.co/spaces/tstone87/stance-detection/resolve/main/crowdresult.jpg' style='width:25%;'/></div>")
             gr.Markdown("## Pose Detection with YOLO11-pose")
                     youtube_input = gr.Textbox(label="YouTube Link", placeholder="https://...")
                 with gr.TabItem("Image URL"):
                     image_url_input = gr.Textbox(label="Image URL", placeholder="https://...")
+            sensitivity_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.2,
+                                         label="Sensitivity (Confidence Threshold)")
         with gr.Column(scale=2):
             output_image = gr.Image(label="Annotated Output (Image)", elem_classes="result_img")
             output_video = gr.Video(label="Annotated Output (Video)")
     )
 if __name__ == "__main__":
+    demo.launch()