Spaces:

nelikCode
/

Video-Background-Removal

Running

App Files Files Community

killian31 commited on Feb 6

Commit

8b09391

0 Parent(s):

initial commit

Browse files

Files changed (13) hide show

.github/ISSUE_TEMPLATE/general-issue.yml +75 -0
.github/workflows/github-actions-black-formatting.yml +20 -0
.gitignore +12 -0
CODE_OF_CONDUCT.md +128 -0
LICENSE +201 -0
README.md +115 -0
app.py +151 -0
images_to_video.py +52 -0
main.py +302 -0
poetry.lock +0 -0
pyproject.toml +34 -0
redirect.py +201 -0
video_to_images.py +49 -0

.github/ISSUE_TEMPLATE/general-issue.yml ADDED Viewed

	@@ -0,0 +1,75 @@

+name: New Issue
+description: Report an issue
+title: "Issue Title"
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to fill out this issue!
+  - type: textarea
+    id: what-happened
+    attributes:
+      label: What happened?
+      description: Describe your issue.
+      placeholder: Tell us what you see!
+      value: "Explain the issue here"
+    validations:
+      required: true
+  - type: textarea
+    id: expected
+    attributes:
+      label: Expected behavior
+      description: What did you expect to happen?
+      placeholder: What's the expected behavior?
+      value: "Describe the expected behavior here"
+    validations:
+      required: false
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant log output
+      description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
+      render: shell
+  - type: dropdown
+    id: os
+    attributes:
+      label: Operating System
+      description: What OS are you running?
+      options:
+        - Linux (Default)
+        - MacOS
+        - Windows
+    validations:
+      required: true
+  - type: textarea
+    id: os-version
+    attributes:
+      label: OS Version
+      description: What is the version of your os?
+    validations:
+      required: true
+  - type: dropdown
+    id: python-version
+    attributes:
+      label: Python version
+      multiple: true
+      options:
+        - 3.6
+        - 3.7
+        - 3.8 (default)
+        - 3.9
+        - 3.10
+        - 3.11
+  - type: textarea
+    id: other
+    attributes:
+      label: Any other information?
+      description: Please let us know any other information that can be useful for us to know
+  - type: checkboxes
+    id: terms
+    attributes:
+      label: Code of Conduct
+      description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/killian31/VideoBackgroundRemoval/blob/main/CODE_OF_CONDUCT.md)
+      options:
+        - label: I agree to follow this project's Code of Conduct
+          required: true

.github/workflows/github-actions-black-formatting.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: black
+on:
+  push:
+    branches: ["main"]
+  pull_request:
+    branches: ["main"]
+jobs:
+  black:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v3
+        with:
+          python-version: "3.10"
+      - run: |
+          python -m pip install --upgrade pip
+          pip install black
+      - run: |
+          black --check --verbose .

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+frames/*
+*.mov
+*.MOV
+*.mp4
+*.MP4
+*.jpg
+*.JPG
+__pycache__/*
+*.txt
+models/*
+temp_images/*
+.DS_Store

CODE_OF_CONDUCT.md ADDED Viewed

	@@ -0,0 +1,128 @@

+# Contributor Covenant Code of Conduct
+## Our Pledge
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+## Our Standards
+Examples of behavior that contributes to a positive environment for our
+community include:
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the
+  overall community
+Examples of unacceptable behavior include:
+* The use of sexualized language or imagery, and sexual attention or
+  advances of any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email
+  address, without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+## Enforcement Responsibilities
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+## Scope
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+## Enforcement
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+killian31.
+All complaints will be reviewed and investigated promptly and fairly.
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+## Enforcement Guidelines
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+### 1. Correction
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+### 2. Warning
+**Community Impact**: A violation through a single incident or series
+of actions.
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or
+permanent ban.
+### 3. Temporary Ban
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+### 4. Permanent Ban
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior,  harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+**Consequence**: A permanent ban from any sort of public interaction within
+the community.
+## Attribution
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).
+[homepage]: https://www.contributor-covenant.org
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2023 killian31
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,115 @@

+# AI Powered Video Background Removal Tool
+[![GitHub stars](https://img.shields.io/github/stars/killian31/VideoBackgroundRemoval.svg)](https://github.com/killian31/VideoBackgroundRemoval/stargazers)
+[![black](https://github.com/killian31/VideoBackgroundRemoval/actions/workflows/github-actions-black-formatting.yml/badge.svg)](https://github.com/killian31/VideoBackgroundRemoval/actions/workflows/github-actions-black-formatting.yml)
+[![wakatime](https://wakatime.com/badge/github/killian31/VideoBackgroundRemoval.svg)](https://wakatime.com/badge/github/killian31/VideoBackgroundRemoval)
+The Video Background Removal Tool is designed to enable users to effortlessly remove backgrounds from videos by selecting a subject in a single frame. This powerful tool is optimized to run on CPUs and boasts a user-friendly interface, making it ideal for a wide range of users, especially online content creators like YouTubers.
+<p align="center">
+  <table>
+    <tr>
+      <td>
+        <img src="assets/example.gif" width="385" height="216" />
+      </td>
+      <td>
+        <img src="assets/output_example.gif" width="385" height="216" />
+      </td>
+    </tr>
+  </table>
+</p>
+## Contents
+Table of contents:
+- [Installation](#installation)
+- [Usage](#usage)
+- [Example](#example)
+- [Contribution](#how-to-contribute)
+## Installation
+### With pyenv and poetry
+```bash
+git clone https://github.com/killian31/VideoBackgroundRemoval.git
+cd VideoBackgroundRemoval
+pyenv virtualenv 3.11.9 vbr
+pyenv activate vbr
+pip install poetry
+poetry install
+```
+## Usage
+### Using the Streamlit app (locally)
+Run `streamlit run app.py` to launch the Streamlit app. Then, upload a video, draw a
+bounding box around what you want to remove the background from, using the sliders,
+and click on Segment Video.
+### Command line
+```bash
+usage: main.py [-h] [--video_filename VIDEO_FILENAME] [--dir_frames DIR_FRAMES] [--image_start IMAGE_START] [--image_end IMAGE_END] [--bbox_file BBOX_FILE] [--skip_vid2im]
+               [--mobile_sam_weights MOBILE_SAM_WEIGHTS] [--tracker_name {yolov7,yoloS}] [--output_dir OUTPUT_DIR] [--output_video OUTPUT_VIDEO] [--auto_detect]
+               [--background_color BACKGROUND_COLOR]
+options:
+  -h, --help            show this help message and exit
+  --video_filename VIDEO_FILENAME
+                        path to the video
+  --dir_frames DIR_FRAMES
+                        path to the directory in which all input frames will be stored
+  --image_start IMAGE_START
+                        first image to be stored
+  --image_end IMAGE_END
+                        last image to be stored, last one if 0
+  --bbox_file BBOX_FILE
+                        path to the bounding box text file
+  --skip_vid2im         whether to write the video frames as images
+  --mobile_sam_weights MOBILE_SAM_WEIGHTS
+                        path to MobileSAM weights
+  --tracker_name {yolov7,yoloS}
+                        tracker name
+  --output_dir OUTPUT_DIR
+                        directory to store the output frames
+  --output_video OUTPUT_VIDEO
+                        path to store the output video
+  --auto_detect         whether to use a bounding box to force the model to segment the object
+  --background_color BACKGROUND_COLOR
+                        background color for the output (hex)
+```
+## Example
+The following command line is a working example from a video stored in the repo:
+```bash
+python3 main.py --video_filename assets/example.mp4 --dir_frames ./frames --bbox_file bbox.txt --mobile_sam_weights models/mobile_sam.pt --output_dir output_frames --output_video output.mp4
+```
+## How to Contribute
+We welcome contributions from the community! To ensure a consistent code style, we ask contributors to follow these guidelines:
+### Code Format
+Please format your code using the `black` code formatter.
+#### Installation
+```bash
+pip install black
+```
+#### Usage
+To format your code:
+```bash
+black .
+```
+This setup will help maintain a consistent coding style throughout the project.

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import os
+import warnings
+import cv2
+import streamlit as st
+from PIL import Image, ImageDraw
+import redirect as rd
+from main import segment_video
+warnings.filterwarnings("ignore")
+def load_image(image_path):
+    return Image.open(image_path)
+def extract_first_frame(video_path, output_image_path):
+    """
+    Extract the first frame from a video file and save it to disk.
+    Parameters:
+        video_path (str): Path to the video file.
+        output_image_path (str): Path to save the extracted frame.
+    Returns:
+        str: Path to the saved frame.
+    """
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        raise ValueError(f"Error: Unable to open video file: {video_path}")
+    ret, frame = cap.read()
+    cap.release()
+    if not ret:
+        raise ValueError("Error: Unable to read the first frame from the video.")
+    cv2.imwrite(output_image_path, frame)
+    return output_image_path
+st.title("Video Background Removal")
+st.write(
+    "This app uses the Mobile-SAM model to remove the background from a video. "
+    "The model is based on the paper [Faster Segment Anything: Towards Lightweight SAM for Mobile Applications](https://arxiv.org/abs/2306.14289)."
+)
+st.write(
+    "How to use: Upload a video and click 'Segment Video'. The app will then process the video and remove the background. "
+    "You can also use a bounding box to specify the area to segment. "
+    "The app will then output the segmented video, that you can download. "
+    "Do not hesitate to hit the 'Stop/Reset' button if you encounter any issues (it usually solves them all) or want to start over."
+)
+video_file = st.file_uploader("Upload a video", type=["mp4", "avi", "mov"])
+if video_file is not None:
+    st.video(video_file)
+    with open("temp_video.mp4", "wb") as f:
+        f.write(video_file.getbuffer())
+    if not os.path.exists("./temp_images"):
+        os.makedirs("./temp_images")
+    frame_path = extract_first_frame("temp_video.mp4", "temp_frame.jpg")
+    use_bbox = st.checkbox("Use bounding box", value=False)
+    background_color = st.color_picker("Background keying color", "#009000")
+    initial_frame = load_image(frame_path)
+    original_width, original_height = initial_frame.width, initial_frame.height
+    if use_bbox:
+        col1, col2 = st.columns(2)
+        with col1:
+            xmin = st.slider("xmin", 0, original_width, original_width // 4)
+            ymin = st.slider("ymin", 0, original_height, original_height // 4)
+        with col2:
+            xmax = st.slider("xmax", 0, original_width, original_width // 2)
+            ymax = st.slider("ymax", 0, original_height, original_height // 2)
+        draw = ImageDraw.Draw(initial_frame)
+        draw.rectangle([xmin, ymin, xmax, ymax], outline="red", width=3)
+        st.image(initial_frame, caption="Bounding Box Preview", use_column_width=True)
+        if st.button("Save Bounding Box"):
+            with open("temp_bbox.txt", "w") as bbox_file:
+                bbox_file.write(f"{xmin} {ymin} {xmax} {ymax}")
+            st.write(f"Bounding box saved to {os.path.abspath('temp_bbox.txt')}")
+    col1, col2 = st.columns(2)
+    with col2:
+        if st.button(
+            "Stop/Reset",
+            key="stop",
+            help="Stop the process and reset the app",
+            type="primary",
+        ):
+            st.write("Stopping...")
+            os.system("rm -r ./temp_images")
+            os.system("rm ./temp_bbox.txt")
+            os.system("rm -r ./temp_processed_images")
+            os.system("rm ./temp_video.mp4")
+            os.system("rm ./temp_frame.jpg")
+            st.write("Process interrupted")
+    with col1:
+        if st.button(
+            "Segment Video", key="segment", help="Segment the video", type="secondary"
+        ):
+            if use_bbox:
+                if not os.path.exists("./temp_bbox.txt"):
+                    with open("temp_bbox.txt", "w") as bbox_file:
+                        bbox_file.write(f"{xmin} {ymin} {xmax} {ymax}")
+            else:
+                with open("temp_bbox.txt", "w") as bbox_file:
+                    bbox_file.write(f"0 0 {original_width} {original_height}")
+            st.write("Segmenting video...")
+            so = st.empty()
+            with rd.stdouterr(to=st.sidebar):
+                segment_video(
+                    video_filename="temp_video.mp4",
+                    dir_frames="temp_images",
+                    image_start=0,
+                    image_end=0,
+                    bbox_file="temp_bbox.txt",
+                    skip_vid2im=False,
+                    mobile_sam_weights="./models/mobile_sam.pt",
+                    auto_detect=not use_bbox,
+                    background_color=background_color,
+                    output_video="video_segmented.mp4",
+                    output_dir="temp_processed_images",
+                    pbar=False,
+                    reverse_mask=not use_bbox,
+                )
+            os.system("rm -rf ./temp_images")
+            os.system("rm -rf ./temp_bbox.txt")
+            os.system("rm -rf ./temp_processed_images")
+            os.system("rm -rf ./temp_video.mp4")
+            st.video("./video_segmented.mp4")
+            st.write(f"Video saved to {os.path.abspath('video_segmented.mp4')}")
+            vid_file = open("video_segmented.mp4", "rb")
+            vid_bytes = vid_file.read()
+            st.download_button(
+                label="Download Segmented Video",
+                data=vid_bytes,
+                file_name="video_segmented.mp4",
+            )
+            vid_file.close()

images_to_video.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os
+import cv2
+from tqdm import tqdm
+class VideoCreator:
+    def __init__(self, imgs_dir, vid_name, pbar=True):
+        """
+        :param str imgs_dir: The directory where the image files are stored.
+        :param str vid_name: The name of the video's filename.
+        :param bool pbar: Whether to display a progress bar.
+        """
+        self.imgs_dir = imgs_dir
+        self.img_array = []
+        self.video_filename = vid_name
+        self.pbar = pbar
+    def preprocess_images(self):
+        filenames = sorted(os.listdir(self.imgs_dir))
+        print("Adding images...")
+        if self.pbar:
+            pb = tqdm(filenames)
+        else:
+            pb = filenames
+        height, width, _ = cv2.imread(self.imgs_dir + "/" + filenames[0]).shape
+        size = (width, height)
+        for filename in pb:
+            complete_filename = self.imgs_dir + "/" + filename
+            img = cv2.imread(complete_filename)
+            # convert to BGR
+            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+            self.img_array.append(img)
+        return size
+    def create_video(self, fps=20):
+        size = self.preprocess_images()
+        out = cv2.VideoWriter(
+            self.video_filename, cv2.VideoWriter_fourcc(*"MJPG"), fps, size
+        )
+        print("Recording video...")
+        if self.pbar:
+            pb = tqdm(range(len(self.img_array)))
+        else:
+            pb = range(len(self.img_array))
+        for i in pb:
+            out.write(self.img_array[i])
+        out.release()
+        print("Done.")

main.py ADDED Viewed

	@@ -0,0 +1,302 @@

+import argparse
+import os
+import time
+import cv2
+import numpy as np
+import requests
+import torch
+import wget
+import yolov7
+from mobile_sam import SamPredictor, sam_model_registry
+from PIL import Image
+from tqdm import tqdm
+from transformers import YolosForObjectDetection, YolosImageProcessor
+from images_to_video import VideoCreator
+from video_to_images import ImageCreator
+def download_mobile_sam_weight(path):
+    if not os.path.exists(path):
+        sam_weights = "https://raw.githubusercontent.com/ChaoningZhang/MobileSAM/master/weights/mobile_sam.pt"
+        for i in range(2, len(path.split("/"))):
+            temp = path.split("/")[:i]
+            cur_path = "/".join(temp)
+            if not os.path.isdir(cur_path):
+                os.mkdir(cur_path)
+        model_name = path.split("/")[-1]
+        if model_name in sam_weights:
+            wget.download(sam_weights, path)
+        else:
+            raise NameError(
+                "There is no pretrained weight to download for %s, you need to provide a path to segformer weights."
+                % model_name
+            )
+def get_closest_bbox(bbox_list, bbox_target):
+    """
+    Given a list of bounding boxes, find the one that is closest to the target bounding box.
+    Args:
+        bbox_list: list of bounding boxes
+        bbox_target: target bounding box
+    Returns:
+        closest bounding box
+    """
+    min_dist = 100000000
+    min_idx = 0
+    for idx, bbox in enumerate(bbox_list):
+        dist = np.linalg.norm(bbox - bbox_target)
+        if dist < min_dist:
+            min_dist = dist
+            min_idx = idx
+    return bbox_list[min_idx]
+def get_bboxes(image_file, image, model, image_processor, threshold=0.9):
+    if image_processor is None:
+        results = model(image_file)
+        predictions = results.pred[0]
+        boxes = predictions[:, :4].detach().numpy()
+        return boxes
+    else:
+        inputs = image_processor(images=image, return_tensors="pt")
+        outputs = model(**inputs)
+        target_sizes = torch.tensor([image.size[::-1]])
+        results = image_processor.post_process_object_detection(
+            outputs, threshold=threshold, target_sizes=target_sizes
+        )[0]
+        return results["boxes"].detach().numpy()
+def segment_video(
+    video_filename,
+    dir_frames,
+    image_start,
+    image_end,
+    bbox_file,
+    skip_vid2im,
+    mobile_sam_weights,
+    auto_detect=False,
+    tracker_name="yolov7",
+    background_color="#009000",
+    output_dir="output_frames",
+    output_video="output.mp4",
+    pbar=False,
+    reverse_mask=False,
+):
+    if not skip_vid2im:
+        vid_to_im = ImageCreator(
+            video_filename,
+            dir_frames,
+            image_start=image_start,
+            image_end=image_end,
+            pbar=pbar,
+        )
+        vid_to_im.get_images()
+    # Get fps of video
+    vid = cv2.VideoCapture(video_filename)
+    fps = vid.get(cv2.CAP_PROP_FPS)
+    vid.release()
+    background_color = background_color.lstrip("#")
+    background_color = (
+        np.array([int(background_color[i : i + 2], 16) for i in (0, 2, 4)]) / 255.0
+    )
+    with open(bbox_file, "r") as f:
+        bbox_orig = [int(coord) for coord in f.read().split(" ")]
+    download_mobile_sam_weight(mobile_sam_weights)
+    if image_end == 0:
+        frames = sorted(os.listdir(dir_frames))[image_start:]
+    else:
+        frames = sorted(os.listdir(dir_frames))[image_start:image_end]
+    model_type = "vit_t"
+    if torch.backends.mps.is_available():
+        device = "mps"
+    elif torch.cuda.is_available():
+        device = "cuda"
+    else:
+        device = "cpu"
+    sam = sam_model_registry[model_type](checkpoint=mobile_sam_weights)
+    sam.to(device=device)
+    sam.eval()
+    predictor = SamPredictor(sam)
+    if not auto_detect:
+        if tracker_name == "yolov7":
+            model = yolov7.load("kadirnar/yolov7-tiny-v0.1", hf_model=True)
+            model.conf = 0.25  # NMS confidence threshold
+            model.iou = 0.45  # NMS IoU threshold
+            model.classes = None
+            image_processor = None
+        else:
+            model = YolosForObjectDetection.from_pretrained("hustvl/yolos-tiny")
+            image_processor = YolosImageProcessor.from_pretrained("hustvl/yolos-tiny")
+    output_frames = []
+    if pbar:
+        pb = tqdm(frames)
+    else:
+        pb = frames
+    processed_frames = 0
+    init_time = time.time()
+    for frame in pb:
+        processed_frames += 1
+        image_file = dir_frames + "/" + frame
+        image_pil = Image.open(image_file)
+        image_np = np.array(image_pil)
+        if not auto_detect:
+            bboxes = get_bboxes(image_file, image_pil, model, image_processor)
+            closest_bbox = get_closest_bbox(bboxes, bbox_orig)
+            input_box = np.array(closest_bbox)
+        else:
+            input_box = np.array([0, 0, image_np.shape[1], image_np.shape[0]])
+        predictor.set_image(image_np)
+        masks, _, _ = predictor.predict(
+            point_coords=None,
+            point_labels=None,
+            box=input_box[None, :],
+            multimask_output=True,
+        )
+        if reverse_mask:
+            mask = masks[0]
+            h, w = mask.shape[-2:]
+            mask_image = (
+                (mask).reshape(h, w, 1) * background_color.reshape(1, 1, -1)
+            ) * 255
+            masked_image = image_np * (1 - mask).reshape(h, w, 1)
+            masked_image = masked_image + mask_image
+            output_frames.append(masked_image)
+        else:
+            mask = masks[0]
+            h, w = mask.shape[-2:]
+            mask_image = (
+                (1 - mask).reshape(h, w, 1) * background_color.reshape(1, 1, -1)
+            ) * 255
+            masked_image = image_np * mask.reshape(h, w, 1)
+            masked_image = masked_image + mask_image
+            output_frames.append(masked_image)
+        if not pbar and processed_frames % 10 == 0:
+            remaining_time = (
+                (time.time() - init_time)
+                / processed_frames
+                * (len(frames) - processed_frames)
+            )
+            remaining_time = int(remaining_time)
+            remaining_time_str = f"{remaining_time//60}m {remaining_time%60}s"
+            print(
+                f"Processed frame {processed_frames}/{len(frames)} - Remaining time: {remaining_time_str}"
+            )
+    if not os.path.exists(output_dir):
+        os.mkdir(output_dir)
+    zfill_max = len(str(len(output_frames)))
+    for idx, frame in enumerate(output_frames):
+        cv2.imwrite(
+            f"{output_dir}/frame_{str(idx).zfill(zfill_max)}.png",
+            frame,
+        )
+    vid_creator = VideoCreator(output_dir, output_video, pbar=pbar)
+    vid_creator.create_video(fps=int(fps))
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--video_filename",
+        default="assets/example.mp4",
+        type=str,
+        help="path to the video",
+    )
+    parser.add_argument(
+        "--dir_frames",
+        type=str,
+        default="frames",
+        help="path to the directory in which all input frames will be stored",
+    )
+    parser.add_argument(
+        "--image_start", type=int, default=0, help="first image to be stored"
+    )
+    parser.add_argument(
+        "--image_end",
+        type=int,
+        default=0,
+        help="last image to be stored, last one if 0",
+    )
+    parser.add_argument(
+        "--bbox_file",
+        type=str,
+        default="bbox.txt",
+        help="path to the bounding box text file",
+    )
+    parser.add_argument(
+        "--skip_vid2im",
+        action="store_true",
+        help="whether to write the video frames as images",
+    )
+    parser.add_argument(
+        "--mobile_sam_weights",
+        type=str,
+        default="./models/mobile_sam.pt",
+        help="path to MobileSAM weights",
+    )
+    parser.add_argument(
+        "--tracker_name",
+        type=str,
+        default="yolov7",
+        help="tracker name",
+        choices=["yolov7", "yoloS"],
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="output_frames",
+        help="directory to store the output frames",
+    )
+    parser.add_argument(
+        "--output_video",
+        type=str,
+        default="output.mp4",
+        help="path to store the output video",
+    )
+    parser.add_argument(
+        "--auto_detect",
+        action="store_true",
+        help="whether to use a bounding box to force the model to segment the object",
+    )
+    parser.add_argument(
+        "--background_color",
+        type=str,
+        default="#009000",
+        help="background color for the output (hex)",
+    )
+    args = parser.parse_args()
+    segment_video(
+        args.video_filename,
+        args.dir_frames,
+        args.image_start,
+        args.image_end,
+        args.bbox_file,
+        args.skip_vid2im,
+        args.mobile_sam_weights,
+        args.auto_detect,
+        args.output_dir,
+        args.output_video,
+        args.tracker_name,
+        args.background_color,
+    )

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,34 @@

+[project]
+name = "vbr"
+version = "1.0.0"
+description = "Automatic background removal from an input video and a single user subject selection."
+authors = [
+    {name = "killian31",email = "[email protected]"}
+]
+license = {text = "Apache 2.0"}
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "numpy (==1.26.4)",
+    "opencv-python (==4.9.0.80)",
+    "opencv-python-headless (>=4.9.0.80,<4.10.0.0)",
+    "pillow (>=10.2.0,<10.3.0)",
+    "requests (>=2.31.0,<2.32.0)",
+    "streamlit (>=1.31.0,<1.32.0)",
+    "timm (>=0.9.12,<0.10.0)",
+    "torch (==2.2.2)",
+    "tqdm (>=4.66.1,<4.67.0)",
+    "transformers (>=4.37.2,<4.38.0)",
+    "wget (>=3.2,<4.0)",
+    "yolov7detect (>=1.0.1,<1.1.0)",
+    "mobile-sam @ git+https://github.com/ChaoningZhang/MobileSAM.git",
+    "huggingface-hub (==0.24.7)"
+]
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"
+[tool.poetry]
+package-mode = false

redirect.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import contextlib
+import io
+import re
+import sys
+import threading
+import streamlit as st
+class _Redirect:
+    class IOStuff(io.StringIO):
+        def __init__(
+            self, trigger, max_buffer, buffer_separator, regex, dup, need_dup, on_thread
+        ):
+            super().__init__()
+            self._trigger = trigger
+            self._max_buffer = max_buffer
+            self._buffer_separator = buffer_separator
+            self._regex = regex and re.compile(regex)
+            self._dup = dup
+            self._need_dup = need_dup
+            self._on_thread = on_thread
+        def write(self, __s: str) -> int:
+            res = None
+            if self._on_thread == threading.get_ident():
+                if self._max_buffer:
+                    concatenated_len = super().tell() + len(__s)
+                    if concatenated_len > self._max_buffer:
+                        rest = self.get_filtered_output()[
+                            concatenated_len - self._max_buffer :
+                        ]
+                        if self._buffer_separator is not None:
+                            rest = rest.split(self._buffer_separator, 1)[-1]
+                        super().seek(0)
+                        super().write(rest)
+                        super().truncate(super().tell() + len(__s))
+                res = super().write(__s)
+                self._trigger(self.get_filtered_output())
+            if self._on_thread != threading.get_ident() or self._need_dup:
+                self._dup.write(__s)
+            return res
+        def get_filtered_output(self):
+            if self._regex is None or self._buffer_separator is None:
+                return self.getvalue()
+            return self._buffer_separator.join(
+                filter(
+                    self._regex.search, self.getvalue().split(self._buffer_separator)
+                )
+            )
+        def print_at_end(self):
+            self._trigger(self.get_filtered_output())
+    def __init__(
+        self,
+        stdout=None,
+        stderr=False,
+        format=None,
+        to=None,
+        max_buffer=None,
+        buffer_separator="\n",
+        regex=None,
+        duplicate_out=False,
+    ):
+        self.io_args = {
+            "trigger": self._write,
+            "max_buffer": max_buffer,
+            "buffer_separator": buffer_separator,
+            "regex": regex,
+            "on_thread": threading.get_ident(),
+        }
+        self.redirections = []
+        self.st = None
+        self.stderr = stderr is True
+        self.stdout = stdout is True or (stdout is None and not self.stderr)
+        self.format = format or "code"
+        self.to = to
+        self.fun = None
+        self.duplicate_out = duplicate_out or None
+        self.active_nested = None
+        if not self.stdout and not self.stderr:
+            raise ValueError("one of stdout or stderr must be True")
+        if self.format not in ["text", "markdown", "latex", "code", "write"]:
+            raise ValueError(
+                f"format need oneof the following: {', '.join(['text', 'markdown', 'latex', 'code', 'write'])}"
+            )
+        if self.to and (not hasattr(self.to, "text") or not hasattr(self.to, "empty")):
+            raise ValueError(f"'to' is not a streamlit container object")
+    def __enter__(self):
+        if self.st is not None:
+            if self.to is None:
+                if self.active_nested is None:
+                    self.active_nested = self(
+                        format=self.format,
+                        max_buffer=self.io_args["max_buffer"],
+                        buffer_separator=self.io_args["buffer_separator"],
+                        regex=self.io_args["regex"],
+                        duplicate_out=self.duplicate_out,
+                    )
+                return self.active_nested.__enter__()
+            else:
+                raise Exception("Already entered")
+        to = self.to or st
+        to.text("Logs:")
+        self.st = to.empty()
+        self.fun = getattr(self.st, self.format)
+        io_obj = None
+        def redirect(to_duplicate, context_redirect):
+            nonlocal io_obj
+            io_obj = _Redirect.IOStuff(
+                need_dup=self.duplicate_out and True, dup=to_duplicate, **self.io_args
+            )
+            redirection = context_redirect(io_obj)
+            self.redirections.append((redirection, io_obj))
+            redirection.__enter__()
+        if self.stderr:
+            redirect(sys.stderr, contextlib.redirect_stderr)
+        if self.stdout:
+            redirect(sys.stdout, contextlib.redirect_stdout)
+        return io_obj
+    def __call__(
+        self,
+        to=None,
+        format=None,
+        max_buffer=None,
+        buffer_separator="\n",
+        regex=None,
+        duplicate_out=False,
+    ):
+        return _Redirect(
+            self.stdout,
+            self.stderr,
+            format=format,
+            to=to,
+            max_buffer=max_buffer,
+            buffer_separator=buffer_separator,
+            regex=regex,
+            duplicate_out=duplicate_out,
+        )
+    def __exit__(self, *exc):
+        if self.active_nested is not None:
+            nested = self.active_nested
+            if nested.active_nested is None:
+                self.active_nested = None
+            return nested.__exit__(*exc)
+        res = None
+        for redirection, io_obj in reversed(self.redirections):
+            res = redirection.__exit__(*exc)
+            io_obj.print_at_end()
+        self.redirections = []
+        self.st = None
+        self.fun = None
+        return res
+    def _write(self, data):
+        self.fun(data)
+stdout = _Redirect()
+stderr = _Redirect(stderr=True)
+stdouterr = _Redirect(stdout=True, stderr=True)
+"""
+# can be used as
+import time
+import sys
+from random import getrandbits
+import streamlit.redirect as rd
+st.text('Suboutput:')
+so = st.empty()
+with rd.stdout, rd.stderr(format='markdown', to=st.sidebar):
+    print("hello  ")
+    time.sleep(1)
+    i = 5
+    while i > 0:
+        print("**M**izu?  ", file=sys.stdout if getrandbits(1) else sys.stderr)
+        i -= 1
+        with rd.stdout(to=so):
+            print(f" cica {i}")
+        if i:
+            time.sleep(1)
+# """

video_to_images.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+import cv2
+from tqdm import tqdm
+class ImageCreator:
+    def __init__(self, filename, imgs_dir, image_start=0, image_end=0, pbar=True):
+        """
+        :param str filename: The name of the video's filename.
+        :param str imgs_dir: The directory where to store the image files.
+        :param int image_start: The first image to be extracted.
+        :param int image_end: The last image to be extracted, 0 if full video.
+        :param bool pbar: Whether to display a progress bar.
+        """
+        self.filename = filename
+        self.imgs_dir = imgs_dir
+        self.image_start = image_start
+        self.image_end = image_end
+        self.pbar = pbar
+        if not os.path.exists(imgs_dir):
+            os.makedirs(imgs_dir)
+    def get_images(self):
+        vid = cv2.VideoCapture(self.filename)
+        total_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
+        success, image = vid.read()
+        count = 0
+        if self.image_end == 0:
+            self.image_end = total_frames
+        zfill_max = len(str(total_frames))
+        ok_count = 0
+        print("Writing images...")
+        if self.pbar:
+            pb = tqdm(total=total_frames)
+        while success:
+            if count >= self.image_start and count <= self.image_end:
+                cv2.imwrite(
+                    f"{self.imgs_dir}/frame_{str(ok_count).zfill(zfill_max)}.png", image
+                )
+                ok_count += 1
+            success, image = vid.read()
+            if self.pbar:
+                pb.update(1)
+            count += 1
+        if self.pbar:
+            pb.close()
+        print("Wrote {} image files.".format(ok_count))