Muhammad Taqi Raza
commited on
Commit
·
fd926cd
1
Parent(s):
7597134
gradio
Browse files- Dockerfile +5 -5
- download/download.py +4 -4
- download/download_models.sh +3 -3
- gradio_app.py +10 -10
- inference/v2v_data/get_anchor_videos.sh +1 -1
- inference/v2v_data/inference.py +5 -5
Dockerfile
CHANGED
@@ -3,8 +3,8 @@ FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime
|
|
3 |
SHELL ["/bin/bash", "-c"]
|
4 |
|
5 |
# Environment variables for Hugging Face cache
|
6 |
-
ENV HF_HOME=/
|
7 |
-
ENV TRANSFORMERS_CACHE=/
|
8 |
ENV HF_TOKEN=${HF_TOKEN}
|
9 |
ENV PATH=/opt/conda/bin:$PATH
|
10 |
# Install system dependencies
|
@@ -19,16 +19,16 @@ WORKDIR /app
|
|
19 |
COPY . /app
|
20 |
|
21 |
# Fix permissions for all subdirectories
|
22 |
-
RUN mkdir -p /
|
23 |
chmod -R 777 /app && \
|
24 |
chmod -R 777 /.cache && \
|
25 |
chmod -R 777 /root
|
26 |
|
27 |
# Create conda environment and install dependencies
|
28 |
-
COPY requirements.txt /
|
29 |
RUN conda create -n epic python=3.10 -y && \
|
30 |
conda run -n epic pip install --upgrade pip && \
|
31 |
-
conda run -n epic pip install -r /
|
32 |
|
33 |
RUN chmod -R 777 /app /workspace
|
34 |
|
|
|
3 |
SHELL ["/bin/bash", "-c"]
|
4 |
|
5 |
# Environment variables for Hugging Face cache
|
6 |
+
ENV HF_HOME=/app/hf_cache
|
7 |
+
ENV TRANSFORMERS_CACHE=/app/hf_cache
|
8 |
ENV HF_TOKEN=${HF_TOKEN}
|
9 |
ENV PATH=/opt/conda/bin:$PATH
|
10 |
# Install system dependencies
|
|
|
19 |
COPY . /app
|
20 |
|
21 |
# Fix permissions for all subdirectories
|
22 |
+
RUN mkdir -p /app/pretrained /app/hf_cache /.cache/gdown && \
|
23 |
chmod -R 777 /app && \
|
24 |
chmod -R 777 /.cache && \
|
25 |
chmod -R 777 /root
|
26 |
|
27 |
# Create conda environment and install dependencies
|
28 |
+
COPY requirements.txt /app/requirements.txt
|
29 |
RUN conda create -n epic python=3.10 -y && \
|
30 |
conda run -n epic pip install --upgrade pip && \
|
31 |
+
conda run -n epic pip install -r /app/requirements.txt
|
32 |
|
33 |
RUN chmod -R 777 /app /workspace
|
34 |
|
download/download.py
CHANGED
@@ -3,22 +3,22 @@ from huggingface_hub import snapshot_download
|
|
3 |
def download_model():
|
4 |
snapshot_download(
|
5 |
repo_id="tencent/DepthCrafter",
|
6 |
-
local_dir="/
|
7 |
local_dir_use_symlinks=False,
|
8 |
)
|
9 |
snapshot_download(
|
10 |
repo_id="stabilityai/stable-video-diffusion-img2vid",
|
11 |
-
local_dir="/
|
12 |
local_dir_use_symlinks=False,
|
13 |
)
|
14 |
snapshot_download(
|
15 |
repo_id= "Qwen/Qwen2.5-VL-7B-Instruct",
|
16 |
-
local_dir="/
|
17 |
local_dir_use_symlinks=False,
|
18 |
)
|
19 |
snapshot_download(
|
20 |
repo_id="THUDM/CogVideoX1.5-5B-SAT",
|
21 |
-
local_dir="/
|
22 |
local_dir_use_symlinks=False,
|
23 |
)
|
24 |
|
|
|
3 |
def download_model():
|
4 |
snapshot_download(
|
5 |
repo_id="tencent/DepthCrafter",
|
6 |
+
local_dir="/app/pretrained/DepthCrafter",
|
7 |
local_dir_use_symlinks=False,
|
8 |
)
|
9 |
snapshot_download(
|
10 |
repo_id="stabilityai/stable-video-diffusion-img2vid",
|
11 |
+
local_dir="/app/pretrained/stable-video-diffusion-img2vid",
|
12 |
local_dir_use_symlinks=False,
|
13 |
)
|
14 |
snapshot_download(
|
15 |
repo_id= "Qwen/Qwen2.5-VL-7B-Instruct",
|
16 |
+
local_dir="/app/pretrained/Qwen2.5-VL-7B-Instruct",
|
17 |
local_dir_use_symlinks=False,
|
18 |
)
|
19 |
snapshot_download(
|
20 |
repo_id="THUDM/CogVideoX1.5-5B-SAT",
|
21 |
+
local_dir="/app/pretrained/CogVideoX-5b-I2V",
|
22 |
local_dir_use_symlinks=False,
|
23 |
)
|
24 |
|
download/download_models.sh
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
-
mkdir -p /
|
2 |
-
gdown 1MqDajR89k-xLV0HIrmJ0k-n8ZpG6_suM -O /
|
3 |
-
python /
|
|
|
1 |
+
mkdir -p /app/pretrained/RAFT
|
2 |
+
gdown 1MqDajR89k-xLV0HIrmJ0k-n8ZpG6_suM -O /app/pretrained/RAFT/raft-things.pth
|
3 |
+
python /app/download/download.py
|
gradio_app.py
CHANGED
@@ -8,12 +8,12 @@ import os
|
|
8 |
# -----------------------------
|
9 |
# Setup paths and env
|
10 |
# -----------------------------
|
11 |
-
HF_HOME = "/
|
12 |
os.environ["HF_HOME"] = HF_HOME
|
13 |
os.environ["TRANSFORMERS_CACHE"] = HF_HOME
|
14 |
os.makedirs(HF_HOME, exist_ok=True)
|
15 |
|
16 |
-
PRETRAINED_DIR = "/
|
17 |
os.makedirs(PRETRAINED_DIR, exist_ok=True)
|
18 |
|
19 |
# -----------------------------
|
@@ -43,8 +43,8 @@ def get_anchor_video(video_path, fps, num_frames, target_pose, mode,
|
|
43 |
seed_input, height, width, aspect_ratio_inputs,
|
44 |
init_dx, init_dy, init_dz):
|
45 |
|
46 |
-
temp_input_path = "/
|
47 |
-
output_dir = "/
|
48 |
video_output_path = f"{output_dir}/masked_videos/output.mp4"
|
49 |
|
50 |
if video_path:
|
@@ -59,7 +59,7 @@ def get_anchor_video(video_path, fps, num_frames, target_pose, mode,
|
|
59 |
h_s, w_s = sample_size.strip().split(",")
|
60 |
|
61 |
command = [
|
62 |
-
"python", "/
|
63 |
"--video_path", temp_input_path,
|
64 |
"--stride", "1",
|
65 |
"--out_dir", output_dir,
|
@@ -111,13 +111,13 @@ def inference(
|
|
111 |
seed, height, width, downscale_coef, vae_channels,
|
112 |
controlnet_input_channels, controlnet_transformer_num_layers
|
113 |
):
|
114 |
-
model_path = "/
|
115 |
-
ckpt_path = "/
|
116 |
-
video_root_dir = "/
|
117 |
-
out_dir = "/
|
118 |
|
119 |
command = [
|
120 |
-
"python", "/
|
121 |
"--video_root_dir", video_root_dir,
|
122 |
"--base_model_path", model_path,
|
123 |
"--controlnet_model_path", ckpt_path,
|
|
|
8 |
# -----------------------------
|
9 |
# Setup paths and env
|
10 |
# -----------------------------
|
11 |
+
HF_HOME = "/app/hf_cache"
|
12 |
os.environ["HF_HOME"] = HF_HOME
|
13 |
os.environ["TRANSFORMERS_CACHE"] = HF_HOME
|
14 |
os.makedirs(HF_HOME, exist_ok=True)
|
15 |
|
16 |
+
PRETRAINED_DIR = "/app/pretrained"
|
17 |
os.makedirs(PRETRAINED_DIR, exist_ok=True)
|
18 |
|
19 |
# -----------------------------
|
|
|
43 |
seed_input, height, width, aspect_ratio_inputs,
|
44 |
init_dx, init_dy, init_dz):
|
45 |
|
46 |
+
temp_input_path = "/app/temp_input.mp4"
|
47 |
+
output_dir = "/app/output_anchor"
|
48 |
video_output_path = f"{output_dir}/masked_videos/output.mp4"
|
49 |
|
50 |
if video_path:
|
|
|
59 |
h_s, w_s = sample_size.strip().split(",")
|
60 |
|
61 |
command = [
|
62 |
+
"python", "/app/inference/v2v_data/inference.py",
|
63 |
"--video_path", temp_input_path,
|
64 |
"--stride", "1",
|
65 |
"--out_dir", output_dir,
|
|
|
111 |
seed, height, width, downscale_coef, vae_channels,
|
112 |
controlnet_input_channels, controlnet_transformer_num_layers
|
113 |
):
|
114 |
+
model_path = "/app/pretrained/CogVideoX-5b-I2V"
|
115 |
+
ckpt_path = "/app/out/EPiC_pretrained/checkpoint-500.pt"
|
116 |
+
video_root_dir = "/app/output_anchor"
|
117 |
+
out_dir = "/app/output"
|
118 |
|
119 |
command = [
|
120 |
+
"python", "/app/inference/cli_demo_camera_i2v_pcd.py",
|
121 |
"--video_root_dir", video_root_dir,
|
122 |
"--base_model_path", model_path,
|
123 |
"--controlnet_model_path", ckpt_path,
|
inference/v2v_data/get_anchor_videos.sh
CHANGED
@@ -6,7 +6,7 @@ target_pose_str="0_30_-0.6_0_0"
|
|
6 |
traj_name="loop1"
|
7 |
traj_txt="test/trajs/${traj_name}.txt"
|
8 |
|
9 |
-
video="/
|
10 |
|
11 |
processed_data_name=$1
|
12 |
# filename=$(basename "$video" .mp4)
|
|
|
6 |
traj_name="loop1"
|
7 |
traj_txt="test/trajs/${traj_name}.txt"
|
8 |
|
9 |
+
video="/app/data/test_v2v/videos/amalfi-coast_traj_loop2.mp4"
|
10 |
|
11 |
processed_data_name=$1
|
12 |
# filename=$(basename "$video" .mp4)
|
inference/v2v_data/inference.py
CHANGED
@@ -99,7 +99,7 @@ def get_parser():
|
|
99 |
parser.add_argument(
|
100 |
'--model_name',
|
101 |
type=str,
|
102 |
-
default='/
|
103 |
help='Path to the model',
|
104 |
)
|
105 |
parser.add_argument(
|
@@ -113,7 +113,7 @@ def get_parser():
|
|
113 |
parser.add_argument(
|
114 |
'--transformer_path',
|
115 |
type=str,
|
116 |
-
default="/
|
117 |
help='Path to the pretrained transformer model',
|
118 |
)
|
119 |
parser.add_argument(
|
@@ -150,14 +150,14 @@ def get_parser():
|
|
150 |
default=". The video is of high quality, and the view is very clear. ",
|
151 |
help='Prompt for video generation',
|
152 |
)
|
153 |
-
parser.add_argument('--qwen_path', type=str, default="/
|
154 |
|
155 |
## depth
|
156 |
# parser.add_argument('--unet_path', type=str, default='checkpoints/DepthCrafter', help='Path to the UNet model')
|
157 |
parser.add_argument(
|
158 |
'--unet_path',
|
159 |
type=str,
|
160 |
-
default="/
|
161 |
help='Path to the UNet model',
|
162 |
)
|
163 |
|
@@ -165,7 +165,7 @@ def get_parser():
|
|
165 |
parser.add_argument(
|
166 |
'--pre_train_path',
|
167 |
type=str,
|
168 |
-
default="/
|
169 |
help='Path to the pre-trained model',
|
170 |
)
|
171 |
parser.add_argument(
|
|
|
99 |
parser.add_argument(
|
100 |
'--model_name',
|
101 |
type=str,
|
102 |
+
default='/app/pretrained/CogVideoX-Fun-V1.1-5b-InP',
|
103 |
help='Path to the model',
|
104 |
)
|
105 |
parser.add_argument(
|
|
|
113 |
parser.add_argument(
|
114 |
'--transformer_path',
|
115 |
type=str,
|
116 |
+
default="/app/pretrained/TrajectoryCrafter",
|
117 |
help='Path to the pretrained transformer model',
|
118 |
)
|
119 |
parser.add_argument(
|
|
|
150 |
default=". The video is of high quality, and the view is very clear. ",
|
151 |
help='Prompt for video generation',
|
152 |
)
|
153 |
+
parser.add_argument('--qwen_path', type=str, default="/app/pretrained/Qwen2.5-VL-7B-Instruct")
|
154 |
|
155 |
## depth
|
156 |
# parser.add_argument('--unet_path', type=str, default='checkpoints/DepthCrafter', help='Path to the UNet model')
|
157 |
parser.add_argument(
|
158 |
'--unet_path',
|
159 |
type=str,
|
160 |
+
default="/app/pretrained/DepthCrafter",
|
161 |
help='Path to the UNet model',
|
162 |
)
|
163 |
|
|
|
165 |
parser.add_argument(
|
166 |
'--pre_train_path',
|
167 |
type=str,
|
168 |
+
default="/app/pretrained/stable-video-diffusion-img2vid",
|
169 |
help='Path to the pre-trained model',
|
170 |
)
|
171 |
parser.add_argument(
|