Spaces:

VIDraft
/

Portrait-Animation

Running on Zero

App Files Files Community

openfree commited on 27 days ago

Commit

c260fe0

verified ·

1 Parent(s): 2279f85

Update sonic.py

Browse files

Files changed (1) hide show

sonic.py +30 -17

sonic.py CHANGED Viewed

@@ -1,27 +1,29 @@
-# ---------------------------------------------------------
-# sonic.py  (2025-05 rev – fix AudioProjModel tensor shape)
-# ---------------------------------------------------------
 import os, math, torch, cv2
-import torch.utils.checkpoint
 from PIL import Image
 from omegaconf import OmegaConf
-from tqdm import tqdm
-from diffusers import AutoencoderKLTemporalDecoder
-from diffusers.schedulers import EulerDiscreteScheduler
 from transformers import WhisperModel, CLIPVisionModelWithProjection, AutoFeatureExtractor
 from src.utils.util import save_videos_grid, seed_everything
-from src.dataset.test_preprocess import image_audio_to_tensor, process_bbox
-from src.models.base.unet_spatio_temporal_condition import (
-    UNetSpatioTemporalConditionModel, add_ip_adapters,
-)
 from src.models.audio_adapter.audio_proj import AudioProjModel
 from src.models.audio_adapter.audio_to_bucket import Audio2bucketModel
-from src.pipelines.pipeline_sonic import SonicPipeline
 from src.utils.RIFE.RIFE_HDv3 import RIFEModel
 from src.dataset.face_align.align import AlignImage
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 # ------------------------------------------------------------------
@@ -123,19 +125,30 @@ def test(pipe, cfg, wav_enc, audio_pe, audio2bucket, img_enc,
 # ------------------------------------------------------------------
 #                       Sonic  wrapper
 # ------------------------------------------------------------------
 class Sonic:
     config_file = os.path.join(BASE_DIR, "config/inference/sonic.yaml")
     config      = OmegaConf.load(config_file)
-    def __init__(self, device_id=0, enable_interpolate_frame=True):
         cfg                = self.config
         cfg.use_interframe = enable_interpolate_frame
-        self.device        = f"cuda:{device_id}" if torch.cuda.is_available() and device_id>=0 else "cpu"
-        cfg.pretrained_model_name_or_path = os.path.join(BASE_DIR, cfg.pretrained_model_name_or_path)
         self._load_models(cfg)
         print("Sonic init done")
     # model-loader (unchanged, but with tiny clean-ups) ------------------------
     def _load_models(self, cfg):
         dtype = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16}[cfg.weight_dtype]

+"""
+sonic.py  – 2025-05 hot-fix
+주요 수정
+  • config.pretrained_model_name_or_path 가 실제 폴더인지 확인
+  • 없다면 huggingface_hub.snapshot_download 로 자동 다운로드
+  • 경로가 준비된 뒤 모델 로드 진행
+"""
 import os, math, torch, cv2
 from PIL import Image
 from omegaconf import OmegaConf
+from tqdm.auto import tqdm
+from diffusers import AutoencoderKLTemporalDecoder, EulerDiscreteScheduler
 from transformers import WhisperModel, CLIPVisionModelWithProjection, AutoFeatureExtractor
+from huggingface_hub import snapshot_download, hf_hub_download
 from src.utils.util import save_videos_grid, seed_everything
+from src.dataset.test_preprocess import process_bbox, image_audio_to_tensor
+from src.models.base.unet_spatio_temporal_condition import UNetSpatioTemporalConditionModel, add_ip_adapters
+from src.pipelines.pipeline_sonic import SonicPipeline
 from src.models.audio_adapter.audio_proj import AudioProjModel
 from src.models.audio_adapter.audio_to_bucket import Audio2bucketModel
 from src.utils.RIFE.RIFE_HDv3 import RIFEModel
 from src.dataset.face_align.align import AlignImage
+# ------------------------------
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+HF_STABLE_REPO = "stabilityai/stable-video-diffusion-img2vid-xt"
+LOCAL_STABLE_DIR = os.path.join(BASE_DIR, "checkpoints", "stable-video-diffusion-img2vid-xt")
 # ------------------------------------------------------------------
 # ------------------------------------------------------------------
 #                       Sonic  wrapper
 # ------------------------------------------------------------------
 class Sonic:
     config_file = os.path.join(BASE_DIR, "config/inference/sonic.yaml")
     config      = OmegaConf.load(config_file)
+    def __init__(self, device_id: int = 0, enable_interpolate_frame: bool = True):
         cfg                = self.config
         cfg.use_interframe = enable_interpolate_frame
+        self.device        = f"cuda:{device_id}" if torch.cuda.is_available() and device_id >= 0 else "cpu"
+        # ----------- ✨ [NEW] pretrained 모델 폴더 확보 ----------------------
+        if not os.path.isdir(LOCAL_STABLE_DIR) or not os.path.isfile(os.path.join(LOCAL_STABLE_DIR, "vae", "config.json")):
+            print("[INFO] 1st-run – downloading base model (~2 GB)…")
+            snapshot_download(repo_id=HF_STABLE_REPO,
+                              local_dir=LOCAL_STABLE_DIR,
+                              resume_download=True,
+                              local_dir_use_symlinks=False)
+        cfg.pretrained_model_name_or_path = LOCAL_STABLE_DIR
+        # ------------------------------------------------------------------
         self._load_models(cfg)
         print("Sonic init done")
     # model-loader (unchanged, but with tiny clean-ups) ------------------------
     def _load_models(self, cfg):
         dtype = {"fp16": torch.float16, "fp32": torch.float32, "bf16": torch.bfloat16}[cfg.weight_dtype]