openfree commited on
Commit
c260fe0
·
verified ·
1 Parent(s): 2279f85

Update sonic.py

Browse files
Files changed (1) hide show
  1. sonic.py +30 -17
sonic.py CHANGED
@@ -1,27 +1,29 @@
1
- # ---------------------------------------------------------
2
- # sonic.py (2025-05 rev – fix AudioProjModel tensor shape)
3
- # ---------------------------------------------------------
 
 
 
 
4
  import os, math, torch, cv2
5
- import torch.utils.checkpoint
6
  from PIL import Image
7
  from omegaconf import OmegaConf
8
- from tqdm import tqdm
9
- from diffusers import AutoencoderKLTemporalDecoder
10
- from diffusers.schedulers import EulerDiscreteScheduler
11
  from transformers import WhisperModel, CLIPVisionModelWithProjection, AutoFeatureExtractor
12
-
13
  from src.utils.util import save_videos_grid, seed_everything
14
- from src.dataset.test_preprocess import image_audio_to_tensor, process_bbox
15
- from src.models.base.unet_spatio_temporal_condition import (
16
- UNetSpatioTemporalConditionModel, add_ip_adapters,
17
- )
18
  from src.models.audio_adapter.audio_proj import AudioProjModel
19
  from src.models.audio_adapter.audio_to_bucket import Audio2bucketModel
20
- from src.pipelines.pipeline_sonic import SonicPipeline
21
  from src.utils.RIFE.RIFE_HDv3 import RIFEModel
22
  from src.dataset.face_align.align import AlignImage
23
-
24
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 
 
25
 
26
 
27
  # ------------------------------------------------------------------
@@ -123,19 +125,30 @@ def test(pipe, cfg, wav_enc, audio_pe, audio2bucket, img_enc,
123
  # ------------------------------------------------------------------
124
  # Sonic wrapper
125
  # ------------------------------------------------------------------
 
126
  class Sonic:
127
  config_file = os.path.join(BASE_DIR, "config/inference/sonic.yaml")
128
  config = OmegaConf.load(config_file)
129
 
130
- def __init__(self, device_id=0, enable_interpolate_frame=True):
131
  cfg = self.config
132
  cfg.use_interframe = enable_interpolate_frame
133
- self.device = f"cuda:{device_id}" if torch.cuda.is_available() and device_id>=0 else "cpu"
134
- cfg.pretrained_model_name_or_path = os.path.join(BASE_DIR, cfg.pretrained_model_name_or_path)
 
 
 
 
 
 
 
 
 
135
 
136
  self._load_models(cfg)
137
  print("Sonic init done")
138
 
 
139
  # model-loader (unchanged, but with tiny clean-ups) ------------------------
140
  def _load_models(self, cfg):
141
  dtype = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16}[cfg.weight_dtype]
 
1
+ """
2
+ sonic.py 2025-05 hot-fix
3
+ 주요 수정
4
+ • config.pretrained_model_name_or_path 가 실제 폴더인지 확인
5
+ • 없다면 huggingface_hub.snapshot_download 로 자동 다운로드
6
+ • 경로가 준비된 뒤 모델 로드 진행
7
+ """
8
  import os, math, torch, cv2
 
9
  from PIL import Image
10
  from omegaconf import OmegaConf
11
+ from tqdm.auto import tqdm
12
+ from diffusers import AutoencoderKLTemporalDecoder, EulerDiscreteScheduler
 
13
  from transformers import WhisperModel, CLIPVisionModelWithProjection, AutoFeatureExtractor
14
+ from huggingface_hub import snapshot_download, hf_hub_download
15
  from src.utils.util import save_videos_grid, seed_everything
16
+ from src.dataset.test_preprocess import process_bbox, image_audio_to_tensor
17
+ from src.models.base.unet_spatio_temporal_condition import UNetSpatioTemporalConditionModel, add_ip_adapters
18
+ from src.pipelines.pipeline_sonic import SonicPipeline
 
19
  from src.models.audio_adapter.audio_proj import AudioProjModel
20
  from src.models.audio_adapter.audio_to_bucket import Audio2bucketModel
 
21
  from src.utils.RIFE.RIFE_HDv3 import RIFEModel
22
  from src.dataset.face_align.align import AlignImage
23
+ # ------------------------------
24
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
25
+ HF_STABLE_REPO = "stabilityai/stable-video-diffusion-img2vid-xt"
26
+ LOCAL_STABLE_DIR = os.path.join(BASE_DIR, "checkpoints", "stable-video-diffusion-img2vid-xt")
27
 
28
 
29
  # ------------------------------------------------------------------
 
125
  # ------------------------------------------------------------------
126
  # Sonic wrapper
127
  # ------------------------------------------------------------------
128
+
129
  class Sonic:
130
  config_file = os.path.join(BASE_DIR, "config/inference/sonic.yaml")
131
  config = OmegaConf.load(config_file)
132
 
133
+ def __init__(self, device_id: int = 0, enable_interpolate_frame: bool = True):
134
  cfg = self.config
135
  cfg.use_interframe = enable_interpolate_frame
136
+ self.device = f"cuda:{device_id}" if torch.cuda.is_available() and device_id >= 0 else "cpu"
137
+
138
+ # ----------- ✨ [NEW] pretrained 모델 폴더 확보 ----------------------
139
+ if not os.path.isdir(LOCAL_STABLE_DIR) or not os.path.isfile(os.path.join(LOCAL_STABLE_DIR, "vae", "config.json")):
140
+ print("[INFO] 1st-run – downloading base model (~2 GB)…")
141
+ snapshot_download(repo_id=HF_STABLE_REPO,
142
+ local_dir=LOCAL_STABLE_DIR,
143
+ resume_download=True,
144
+ local_dir_use_symlinks=False)
145
+ cfg.pretrained_model_name_or_path = LOCAL_STABLE_DIR
146
+ # ------------------------------------------------------------------
147
 
148
  self._load_models(cfg)
149
  print("Sonic init done")
150
 
151
+
152
  # model-loader (unchanged, but with tiny clean-ups) ------------------------
153
  def _load_models(self, cfg):
154
  dtype = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16}[cfg.weight_dtype]