openfree commited on
Commit
0537b34
·
verified ·
1 Parent(s): 857fa09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -40
app.py CHANGED
@@ -9,18 +9,19 @@ from PIL import Image
9
  import torch
10
 
11
  # Initialize the model
12
- cmd = 'python3 -m pip install "huggingface_hub[cli]"; \
13
- huggingface-cli download LeonJoe13/Sonic --local-dir checkpoints; \
14
- huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt --local-dir checkpoints/stable-video-diffusion-img2vid-xt; \
15
- huggingface-cli download openai/whisper-tiny --local-dir checkpoints/whisper-tiny;'
 
 
16
  os.system(cmd)
17
 
18
  pipe = Sonic()
19
 
20
  def get_md5(content):
21
  md5hash = hashlib.md5(content)
22
- md5 = md5hash.hexdigest()
23
- return md5
24
 
25
  @spaces.GPU(duration=300) # Increased duration to handle longer videos
26
  def get_video_res(img_path, audio_path, res_video_path, dynamic_scale=1.0):
@@ -28,9 +29,9 @@ def get_video_res(img_path, audio_path, res_video_path, dynamic_scale=1.0):
28
  min_resolution = 512
29
  inference_steps = 25
30
 
31
- # Get audio duration
32
  audio = AudioSegment.from_file(audio_path)
33
- duration = len(audio) / 1000.0 # Convert to seconds
34
 
35
  face_info = pipe.preprocess(img_path, expand_ratio=expand_ratio)
36
  print(f"Face detection info: {face_info}")
@@ -42,15 +43,14 @@ def get_video_res(img_path, audio_path, res_video_path, dynamic_scale=1.0):
42
  img_path = crop_image_path
43
  os.makedirs(os.path.dirname(res_video_path), exist_ok=True)
44
 
45
- # Process with full audio duration
46
  pipe.process(
47
- img_path,
48
- audio_path,
49
- res_video_path,
50
  min_resolution=min_resolution,
51
  inference_steps=inference_steps,
52
- dynamic_scale=dynamic_scale,
53
- duration=duration # Pass the actual duration
54
  )
55
  else:
56
  return -1
@@ -61,21 +61,21 @@ os.makedirs(tmp_path, exist_ok=True)
61
  os.makedirs(res_path, exist_ok=True)
62
 
63
  def process_sonic(image, audio, dynamic_scale):
64
- # Input validation
65
  if image is None:
66
  raise gr.Error("Please upload an image")
67
  if audio is None:
68
  raise gr.Error("Please upload an audio file")
69
-
70
  img_md5 = get_md5(np.array(image))
71
  audio_md5 = get_md5(audio[1])
72
  print(f"Processing with image hash: {img_md5}, audio hash: {audio_md5}")
73
-
74
  sampling_rate, arr = audio[:2]
75
  if len(arr.shape) == 1:
76
  arr = arr[:, None]
77
-
78
- # Create audio segment
79
  audio_segment = AudioSegment(
80
  arr.tobytes(),
81
  frame_rate=sampling_rate,
@@ -83,19 +83,19 @@ def process_sonic(image, audio, dynamic_scale):
83
  channels=arr.shape[1]
84
  )
85
  audio_segment = audio_segment.set_frame_rate(sampling_rate)
86
-
87
- # Generate paths
88
  image_path = os.path.abspath(os.path.join(tmp_path, f'{img_md5}.png'))
89
  audio_path = os.path.abspath(os.path.join(tmp_path, f'{audio_md5}.wav'))
90
  res_video_path = os.path.abspath(os.path.join(res_path, f'{img_md5}_{audio_md5}_{dynamic_scale}.mp4'))
91
-
92
- # Save inputs if they don't exist
93
  if not os.path.exists(image_path):
94
  image.save(image_path)
95
  if not os.path.exists(audio_path):
96
  audio_segment.export(audio_path, format="wav")
97
-
98
- # Process or return cached result
99
  if os.path.exists(res_video_path):
100
  print(f"Using cached result: {res_video_path}")
101
  return res_video_path
@@ -103,12 +103,10 @@ def process_sonic(image, audio, dynamic_scale):
103
  print(f"Generating new video with dynamic scale: {dynamic_scale}")
104
  return get_video_res(image_path, audio_path, res_video_path, dynamic_scale)
105
 
106
- # Dummy get_example function to prevent errors if examples are not defined
107
  def get_example():
108
- # 예시가 없다면 빈 리스트를 반환하거나 실제 예시 데이터를 입력할 수 있습니다.
109
  return []
110
 
111
- # Enhanced UI
112
  css = """
113
  .gradio-container {
114
  font-family: 'Arial', sans-serif;
@@ -136,7 +134,7 @@ with gr.Blocks(css=css) as demo:
136
  <p>Transform still images into dynamic videos synchronized with audio</p>
137
  </div>
138
  """)
139
-
140
  with gr.Row():
141
  with gr.Column():
142
  image_input = gr.Image(
@@ -144,13 +142,13 @@ with gr.Blocks(css=css) as demo:
144
  label="Portrait Image",
145
  elem_id="image_input"
146
  )
147
-
148
  audio_input = gr.Audio(
149
  label="Voice/Audio Input",
150
  elem_id="audio_input",
151
  type="numpy"
152
  )
153
-
154
  with gr.Column():
155
  dynamic_scale = gr.Slider(
156
  minimum=0.5,
@@ -160,28 +158,28 @@ with gr.Blocks(css=css) as demo:
160
  label="Animation Intensity",
161
  info="Adjust to control movement intensity (0.5: subtle, 2.0: dramatic)"
162
  )
163
-
164
  process_btn = gr.Button(
165
  "Generate Animation",
166
  variant="primary",
167
  elem_id="process_btn"
168
  )
169
-
170
  with gr.Column():
171
  video_output = gr.Video(
172
  label="Generated Animation",
173
  elem_id="video_output"
174
  )
175
-
176
- # Process button click
177
  process_btn.click(
178
  fn=process_sonic,
179
  inputs=[image_input, audio_input, dynamic_scale],
180
  outputs=video_output,
181
  api_name="animate"
182
  )
183
-
184
- # Examples section (elem_classes 인자 제거)
185
  gr.Examples(
186
  examples=get_example(),
187
  fn=process_sonic,
@@ -189,8 +187,8 @@ with gr.Blocks(css=css) as demo:
189
  outputs=video_output,
190
  cache_examples=False
191
  )
192
-
193
- # Footer with attribution and links
194
  gr.HTML("""
195
  <div style="text-align: center; margin-top: 2em;">
196
  <div style="margin-bottom: 1em;">
@@ -205,4 +203,5 @@ with gr.Blocks(css=css) as demo:
205
  </div>
206
  """)
207
 
208
- demo.launch()
 
 
9
  import torch
10
 
11
  # Initialize the model
12
+ cmd = (
13
+ 'python3 -m pip install "huggingface_hub[cli]"; '
14
+ 'huggingface-cli download LeonJoe13/Sonic --local-dir checkpoints; '
15
+ 'huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt --local-dir checkpoints/stable-video-diffusion-img2vid-xt; '
16
+ 'huggingface-cli download openai/whisper-tiny --local-dir checkpoints/whisper-tiny;'
17
+ )
18
  os.system(cmd)
19
 
20
  pipe = Sonic()
21
 
22
  def get_md5(content):
23
  md5hash = hashlib.md5(content)
24
+ return md5hash.hexdigest()
 
25
 
26
  @spaces.GPU(duration=300) # Increased duration to handle longer videos
27
  def get_video_res(img_path, audio_path, res_video_path, dynamic_scale=1.0):
 
29
  min_resolution = 512
30
  inference_steps = 25
31
 
32
+ # Get audio duration (정보 출력용)
33
  audio = AudioSegment.from_file(audio_path)
34
+ duration = len(audio) / 1000.0 # 단위 변환
35
 
36
  face_info = pipe.preprocess(img_path, expand_ratio=expand_ratio)
37
  print(f"Face detection info: {face_info}")
 
43
  img_path = crop_image_path
44
  os.makedirs(os.path.dirname(res_video_path), exist_ok=True)
45
 
46
+ # NOTE: Sonic.process()는 이상 duration 인자를 받지 않으므로 제거합니다.
47
  pipe.process(
48
+ img_path,
49
+ audio_path,
50
+ res_video_path,
51
  min_resolution=min_resolution,
52
  inference_steps=inference_steps,
53
+ dynamic_scale=dynamic_scale
 
54
  )
55
  else:
56
  return -1
 
61
  os.makedirs(res_path, exist_ok=True)
62
 
63
  def process_sonic(image, audio, dynamic_scale):
64
+ # 입력 검증
65
  if image is None:
66
  raise gr.Error("Please upload an image")
67
  if audio is None:
68
  raise gr.Error("Please upload an audio file")
69
+
70
  img_md5 = get_md5(np.array(image))
71
  audio_md5 = get_md5(audio[1])
72
  print(f"Processing with image hash: {img_md5}, audio hash: {audio_md5}")
73
+
74
  sampling_rate, arr = audio[:2]
75
  if len(arr.shape) == 1:
76
  arr = arr[:, None]
77
+
78
+ # 오디오 세그먼트 생성
79
  audio_segment = AudioSegment(
80
  arr.tobytes(),
81
  frame_rate=sampling_rate,
 
83
  channels=arr.shape[1]
84
  )
85
  audio_segment = audio_segment.set_frame_rate(sampling_rate)
86
+
87
+ # 파일 경로 생성
88
  image_path = os.path.abspath(os.path.join(tmp_path, f'{img_md5}.png'))
89
  audio_path = os.path.abspath(os.path.join(tmp_path, f'{audio_md5}.wav'))
90
  res_video_path = os.path.abspath(os.path.join(res_path, f'{img_md5}_{audio_md5}_{dynamic_scale}.mp4'))
91
+
92
+ # 입력 파일이 없으면 저장
93
  if not os.path.exists(image_path):
94
  image.save(image_path)
95
  if not os.path.exists(audio_path):
96
  audio_segment.export(audio_path, format="wav")
97
+
98
+ # 캐시된 결과가 있으면 반환, 없으면 새로 생성
99
  if os.path.exists(res_video_path):
100
  print(f"Using cached result: {res_video_path}")
101
  return res_video_path
 
103
  print(f"Generating new video with dynamic scale: {dynamic_scale}")
104
  return get_video_res(image_path, audio_path, res_video_path, dynamic_scale)
105
 
106
+ # 예시 데이터를 위한 dummy 함수 (필요에 따라 실제 예시 데이터를 넣으세요)
107
  def get_example():
 
108
  return []
109
 
 
110
  css = """
111
  .gradio-container {
112
  font-family: 'Arial', sans-serif;
 
134
  <p>Transform still images into dynamic videos synchronized with audio</p>
135
  </div>
136
  """)
137
+
138
  with gr.Row():
139
  with gr.Column():
140
  image_input = gr.Image(
 
142
  label="Portrait Image",
143
  elem_id="image_input"
144
  )
145
+
146
  audio_input = gr.Audio(
147
  label="Voice/Audio Input",
148
  elem_id="audio_input",
149
  type="numpy"
150
  )
151
+
152
  with gr.Column():
153
  dynamic_scale = gr.Slider(
154
  minimum=0.5,
 
158
  label="Animation Intensity",
159
  info="Adjust to control movement intensity (0.5: subtle, 2.0: dramatic)"
160
  )
161
+
162
  process_btn = gr.Button(
163
  "Generate Animation",
164
  variant="primary",
165
  elem_id="process_btn"
166
  )
167
+
168
  with gr.Column():
169
  video_output = gr.Video(
170
  label="Generated Animation",
171
  elem_id="video_output"
172
  )
173
+
174
+ # 버튼 클릭 시 애니메이션 생성 함수 호출
175
  process_btn.click(
176
  fn=process_sonic,
177
  inputs=[image_input, audio_input, dynamic_scale],
178
  outputs=video_output,
179
  api_name="animate"
180
  )
181
+
182
+ # 예시 섹션
183
  gr.Examples(
184
  examples=get_example(),
185
  fn=process_sonic,
 
187
  outputs=video_output,
188
  cache_examples=False
189
  )
190
+
191
+ # Footer: Attribution & Links
192
  gr.HTML("""
193
  <div style="text-align: center; margin-top: 2em;">
194
  <div style="margin-bottom: 1em;">
 
203
  </div>
204
  """)
205
 
206
+ # 공유 링크 생성: share=True
207
+ demo.launch(share=True)