Spaces:
Running
on
Zero
Running
on
Zero
Update sonic.py
Browse files
sonic.py
CHANGED
@@ -1,27 +1,29 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
4 |
import os, math, torch, cv2
|
5 |
-
import torch.utils.checkpoint
|
6 |
from PIL import Image
|
7 |
from omegaconf import OmegaConf
|
8 |
-
from tqdm import tqdm
|
9 |
-
from diffusers import AutoencoderKLTemporalDecoder
|
10 |
-
from diffusers.schedulers import EulerDiscreteScheduler
|
11 |
from transformers import WhisperModel, CLIPVisionModelWithProjection, AutoFeatureExtractor
|
12 |
-
|
13 |
from src.utils.util import save_videos_grid, seed_everything
|
14 |
-
from src.dataset.test_preprocess import
|
15 |
-
from src.models.base.unet_spatio_temporal_condition import
|
16 |
-
|
17 |
-
)
|
18 |
from src.models.audio_adapter.audio_proj import AudioProjModel
|
19 |
from src.models.audio_adapter.audio_to_bucket import Audio2bucketModel
|
20 |
-
from src.pipelines.pipeline_sonic import SonicPipeline
|
21 |
from src.utils.RIFE.RIFE_HDv3 import RIFEModel
|
22 |
from src.dataset.face_align.align import AlignImage
|
23 |
-
|
24 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
25 |
|
26 |
|
27 |
# ------------------------------------------------------------------
|
@@ -123,19 +125,30 @@ def test(pipe, cfg, wav_enc, audio_pe, audio2bucket, img_enc,
|
|
123 |
# ------------------------------------------------------------------
|
124 |
# Sonic wrapper
|
125 |
# ------------------------------------------------------------------
|
|
|
126 |
class Sonic:
|
127 |
config_file = os.path.join(BASE_DIR, "config/inference/sonic.yaml")
|
128 |
config = OmegaConf.load(config_file)
|
129 |
|
130 |
-
def __init__(self, device_id=0, enable_interpolate_frame=True):
|
131 |
cfg = self.config
|
132 |
cfg.use_interframe = enable_interpolate_frame
|
133 |
-
self.device = f"cuda:{device_id}" if torch.cuda.is_available() and device_id>=0 else "cpu"
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
self._load_models(cfg)
|
137 |
print("Sonic init done")
|
138 |
|
|
|
139 |
# model-loader (unchanged, but with tiny clean-ups) ------------------------
|
140 |
def _load_models(self, cfg):
|
141 |
dtype = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16}[cfg.weight_dtype]
|
|
|
1 |
+
"""
|
2 |
+
sonic.py – 2025-05 hot-fix
|
3 |
+
주요 수정
|
4 |
+
• config.pretrained_model_name_or_path 가 실제 폴더인지 확인
|
5 |
+
• 없다면 huggingface_hub.snapshot_download 로 자동 다운로드
|
6 |
+
• 경로가 준비된 뒤 모델 로드 진행
|
7 |
+
"""
|
8 |
import os, math, torch, cv2
|
|
|
9 |
from PIL import Image
|
10 |
from omegaconf import OmegaConf
|
11 |
+
from tqdm.auto import tqdm
|
12 |
+
from diffusers import AutoencoderKLTemporalDecoder, EulerDiscreteScheduler
|
|
|
13 |
from transformers import WhisperModel, CLIPVisionModelWithProjection, AutoFeatureExtractor
|
14 |
+
from huggingface_hub import snapshot_download, hf_hub_download
|
15 |
from src.utils.util import save_videos_grid, seed_everything
|
16 |
+
from src.dataset.test_preprocess import process_bbox, image_audio_to_tensor
|
17 |
+
from src.models.base.unet_spatio_temporal_condition import UNetSpatioTemporalConditionModel, add_ip_adapters
|
18 |
+
from src.pipelines.pipeline_sonic import SonicPipeline
|
|
|
19 |
from src.models.audio_adapter.audio_proj import AudioProjModel
|
20 |
from src.models.audio_adapter.audio_to_bucket import Audio2bucketModel
|
|
|
21 |
from src.utils.RIFE.RIFE_HDv3 import RIFEModel
|
22 |
from src.dataset.face_align.align import AlignImage
|
23 |
+
# ------------------------------
|
24 |
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
25 |
+
HF_STABLE_REPO = "stabilityai/stable-video-diffusion-img2vid-xt"
|
26 |
+
LOCAL_STABLE_DIR = os.path.join(BASE_DIR, "checkpoints", "stable-video-diffusion-img2vid-xt")
|
27 |
|
28 |
|
29 |
# ------------------------------------------------------------------
|
|
|
125 |
# ------------------------------------------------------------------
|
126 |
# Sonic wrapper
|
127 |
# ------------------------------------------------------------------
|
128 |
+
|
129 |
class Sonic:
|
130 |
config_file = os.path.join(BASE_DIR, "config/inference/sonic.yaml")
|
131 |
config = OmegaConf.load(config_file)
|
132 |
|
133 |
+
def __init__(self, device_id: int = 0, enable_interpolate_frame: bool = True):
|
134 |
cfg = self.config
|
135 |
cfg.use_interframe = enable_interpolate_frame
|
136 |
+
self.device = f"cuda:{device_id}" if torch.cuda.is_available() and device_id >= 0 else "cpu"
|
137 |
+
|
138 |
+
# ----------- ✨ [NEW] pretrained 모델 폴더 확보 ----------------------
|
139 |
+
if not os.path.isdir(LOCAL_STABLE_DIR) or not os.path.isfile(os.path.join(LOCAL_STABLE_DIR, "vae", "config.json")):
|
140 |
+
print("[INFO] 1st-run – downloading base model (~2 GB)…")
|
141 |
+
snapshot_download(repo_id=HF_STABLE_REPO,
|
142 |
+
local_dir=LOCAL_STABLE_DIR,
|
143 |
+
resume_download=True,
|
144 |
+
local_dir_use_symlinks=False)
|
145 |
+
cfg.pretrained_model_name_or_path = LOCAL_STABLE_DIR
|
146 |
+
# ------------------------------------------------------------------
|
147 |
|
148 |
self._load_models(cfg)
|
149 |
print("Sonic init done")
|
150 |
|
151 |
+
|
152 |
# model-loader (unchanged, but with tiny clean-ups) ------------------------
|
153 |
def _load_models(self, cfg):
|
154 |
dtype = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16}[cfg.weight_dtype]
|