Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,619 Bytes
ff37225 9d31513 ff37225 2279f85 ff37225 9132603 ff37225 9132603 ff37225 2279f85 ff37225 2279f85 ff37225 2279f85 ff37225 2279f85 ff37225 2279f85 ff37225 2279f85 ff37225 2279f85 ff37225 2279f85 ff37225 9d31513 ff37225 2279f85 9d31513 ff37225 2279f85 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# app.py
import os, io, hashlib, spaces, gradio as gr
from pydub import AudioSegment
from PIL import Image
import numpy as np
from sonic import Sonic # <-- 수정된 sonic.py 사용
# ------------------------------------------------------------------
# 1. 필요한 모델·라이브러리 설치 & 체크포인트 다운로드
# ------------------------------------------------------------------
SETUP_CMD = (
'python -m pip install "huggingface_hub[cli]" accelerate; '
'huggingface-cli download LeonJoe13/Sonic '
' --local-dir checkpoints/Sonic --local-dir-use-symlinks False; '
'huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt '
' --local-dir checkpoints/stable-video-diffusion-img2vid-xt --local-dir-use-symlinks False; '
'huggingface-cli download openai/whisper-tiny '
' --local-dir checkpoints/whisper-tiny --local-dir-use-symlinks False; '
)
os.system(SETUP_CMD)
# ------------------------------------------------------------------
# 2. 파이프라인 초기화 (GPU 한 번만)
# ------------------------------------------------------------------
pipe = Sonic() # 오류가 사라진 상태로 초기화
# ------------------------------------------------------------------
# 3. 유틸리티
# ------------------------------------------------------------------
def _md5(b: bytes) -> str:
return hashlib.md5(b).hexdigest()
TMP_DIR = "tmp_path"
RES_DIR = "res_path"
os.makedirs(TMP_DIR, exist_ok=True)
os.makedirs(RES_DIR, exist_ok=True)
# ------------------------------------------------------------------
# 4. 실제 비디오 생성 (GPU 태스크)
# ------------------------------------------------------------------
@spaces.GPU(duration=600) # 최대 10분
def _render_video(img_path: str,
audio_path: str,
out_path: str,
dynamic_scale: float = 1.0) -> str | int:
min_resolution = 512
audio = AudioSegment.from_file(audio_path)
duration_sec = len(audio) / 1000.0
steps = int(np.clip(duration_sec * 12.5, 25, 750))
print(f"[INFO] Audio duration={duration_sec:.2f}s → inference_steps={steps}")
face_info = pipe.preprocess(img_path)
print(f"[INFO] Face detection info: {face_info}")
if face_info["face_num"] == 0:
return -1 # 얼굴 미검출
os.makedirs(os.path.dirname(out_path), exist_ok=True)
pipe.process(
img_path, audio_path, out_path,
min_resolution=min_resolution,
inference_steps=steps,
dynamic_scale=dynamic_scale,
)
return out_path
# ------------------------------------------------------------------
# 5. Gradio 래퍼
# ------------------------------------------------------------------
def run_sonic(image, audio, dynamic_scale):
if image is None:
raise gr.Error("Please upload an image.")
if audio is None:
raise gr.Error("Please upload an audio file.")
# ── 이미지 캐시 ───────────────────────────────────────────────
buf_i = io.BytesIO(); image.save(buf_i, format="PNG")
img_hash = _md5(buf_i.getvalue())
img_path = os.path.join(TMP_DIR, f"{img_hash}.png")
if not os.path.exists(img_path):
with open(img_path, "wb") as f: f.write(buf_i.getvalue())
# ── 오디오 캐시 (mono/16 kHz, ≤60 s) ─────────────────────────
rate, arr = audio[:2]
if arr.ndim == 1: arr = arr[:, None]
seg = AudioSegment(arr.tobytes(), frame_rate=rate,
sample_width=arr.dtype.itemsize, channels=arr.shape[1])
seg = seg.set_channels(1).set_frame_rate(16000)[:60_000]
buf_a = io.BytesIO(); seg.export(buf_a, format="wav")
aud_hash = _md5(buf_a.getvalue())
aud_path = os.path.join(TMP_DIR, f"{aud_hash}.wav")
if not os.path.exists(aud_path):
with open(aud_path, "wb") as f: f.write(buf_a.getvalue())
# ── 결과 경로 ────────────────────────────────────────────────
out_path = os.path.join(
RES_DIR, f"{img_hash}_{aud_hash}_{dynamic_scale:.1f}.mp4"
)
if os.path.exists(out_path):
print(f"[INFO] Cache hit → {out_path}")
return out_path
print(f"[INFO] Generating video (dynamic_scale={dynamic_scale}) …")
return _render_video(img_path, aud_path, out_path, dynamic_scale)
# ------------------------------------------------------------------
# 6. Gradio UI
# ------------------------------------------------------------------
CSS = """
.gradio-container{font-family:Arial, sans-serif}
.main-header{text-align:center;color:#2a2a2a;margin-bottom:2em}
"""
with gr.Blocks(css=CSS) as demo:
gr.HTML("""
<div class="main-header">
<h1>🎭 Sonic - Portrait Animation</h1>
<p>Turn a single photo into a talking-head video (≤1 min audio)</p>
</div>""")
with gr.Row():
with gr.Column():
img_in = gr.Image(type="pil", label="Portrait Image")
aud_in = gr.Audio(label="Voice / Audio (≤60 s)", type="numpy")
scale = gr.Slider(0.5, 2.0, 1.0, step=0.1,
label="Animation Intensity")
btn = gr.Button("Generate Animation", variant="primary")
with gr.Column():
vid_out = gr.Video(label="Result")
btn.click(run_sonic, [img_in, aud_in, scale], vid_out)
demo.launch(share=True)
|