openfree commited on
Commit
317219d
·
verified ·
1 Parent(s): 137ab16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -27
app.py CHANGED
@@ -27,26 +27,26 @@ def get_video_res(img_path, audio_path, res_video_path, dynamic_scale=1.0):
27
  expand_ratio = 0.5
28
  min_resolution = 512
29
  inference_steps = 25
30
-
31
  # Get audio duration
32
  audio = AudioSegment.from_file(audio_path)
33
  duration = len(audio) / 1000.0 # Convert to seconds
34
-
35
  face_info = pipe.preprocess(img_path, expand_ratio=expand_ratio)
36
  print(f"Face detection info: {face_info}")
37
  print(f"Audio duration: {duration} seconds")
38
-
39
  if face_info['face_num'] > 0:
40
  crop_image_path = img_path + '.crop.png'
41
  pipe.crop_image(img_path, crop_image_path, face_info['crop_bbox'])
42
  img_path = crop_image_path
43
  os.makedirs(os.path.dirname(res_video_path), exist_ok=True)
44
-
45
  # Process with full audio duration
46
  pipe.process(
47
- img_path,
48
- audio_path,
49
- res_video_path,
50
  min_resolution=min_resolution,
51
  inference_steps=inference_steps,
52
  dynamic_scale=dynamic_scale,
@@ -57,8 +57,8 @@ def get_video_res(img_path, audio_path, res_video_path, dynamic_scale=1.0):
57
 
58
  tmp_path = './tmp_path/'
59
  res_path = './res_path/'
60
- os.makedirs(tmp_path, exist_ok=1)
61
- os.makedirs(res_path, exist_ok=1)
62
 
63
  def process_sonic(image, audio, dynamic_scale):
64
  # Input validation
@@ -66,15 +66,15 @@ def process_sonic(image, audio, dynamic_scale):
66
  raise gr.Error("Please upload an image")
67
  if audio is None:
68
  raise gr.Error("Please upload an audio file")
69
-
70
  img_md5 = get_md5(np.array(image))
71
  audio_md5 = get_md5(audio[1])
72
  print(f"Processing with image hash: {img_md5}, audio hash: {audio_md5}")
73
-
74
  sampling_rate, arr = audio[:2]
75
  if len(arr.shape) == 1:
76
  arr = arr[:, None]
77
-
78
  # Create audio segment
79
  audio_segment = AudioSegment(
80
  arr.tobytes(),
@@ -83,18 +83,18 @@ def process_sonic(image, audio, dynamic_scale):
83
  channels=arr.shape[1]
84
  )
85
  audio_segment = audio_segment.set_frame_rate(sampling_rate)
86
-
87
  # Generate paths
88
  image_path = os.path.abspath(os.path.join(tmp_path, f'{img_md5}.png'))
89
  audio_path = os.path.abspath(os.path.join(tmp_path, f'{audio_md5}.wav'))
90
  res_video_path = os.path.abspath(os.path.join(res_path, f'{img_md5}_{audio_md5}_{dynamic_scale}.mp4'))
91
-
92
  # Save inputs if they don't exist
93
  if not os.path.exists(image_path):
94
  image.save(image_path)
95
  if not os.path.exists(audio_path):
96
  audio_segment.export(audio_path, format="wav")
97
-
98
  # Process or return cached result
99
  if os.path.exists(res_video_path):
100
  print(f"Using cached result: {res_video_path}")
@@ -103,6 +103,11 @@ def process_sonic(image, audio, dynamic_scale):
103
  print(f"Generating new video with dynamic scale: {dynamic_scale}")
104
  return get_video_res(image_path, audio_path, res_video_path, dynamic_scale)
105
 
 
 
 
 
 
106
  # Enhanced UI
107
  css = """
108
  .gradio-container {
@@ -131,22 +136,21 @@ with gr.Blocks(css=css) as demo:
131
  <p>Transform still images into dynamic videos synchronized with audio</p>
132
  </div>
133
  """)
134
-
135
  with gr.Row():
136
  with gr.Column():
137
  image_input = gr.Image(
138
  type='pil',
139
  label="Portrait Image",
140
- elem_id="image_input",
141
- tool="select"
142
  )
143
-
144
  audio_input = gr.Audio(
145
  label="Voice/Audio Input",
146
  elem_id="audio_input",
147
  type="numpy"
148
  )
149
-
150
  with gr.Box(elem_classes="parameter-section"):
151
  dynamic_scale = gr.Slider(
152
  minimum=0.5,
@@ -156,19 +160,19 @@ with gr.Blocks(css=css) as demo:
156
  label="Animation Intensity",
157
  info="Adjust to control movement intensity (0.5: subtle, 2.0: dramatic)"
158
  )
159
-
160
  process_btn = gr.Button(
161
- "Generate Animation",
162
  variant="primary",
163
  elem_id="process_btn"
164
  )
165
-
166
  with gr.Column():
167
  video_output = gr.Video(
168
  label="Generated Animation",
169
  elem_id="video_output"
170
  )
171
-
172
  # Process button click
173
  process_btn.click(
174
  fn=process_sonic,
@@ -176,7 +180,7 @@ with gr.Blocks(css=css) as demo:
176
  outputs=video_output,
177
  api_name="animate"
178
  )
179
-
180
  # Examples section
181
  gr.Examples(
182
  examples=get_example(),
@@ -186,7 +190,7 @@ with gr.Blocks(css=css) as demo:
186
  cache_examples=False,
187
  elem_classes="example-section"
188
  )
189
-
190
  # Footer with attribution and links
191
  gr.HTML("""
192
  <div style="text-align: center; margin-top: 2em;">
@@ -202,4 +206,4 @@ with gr.Blocks(css=css) as demo:
202
  </div>
203
  """)
204
 
205
- demo.launch()
 
27
  expand_ratio = 0.5
28
  min_resolution = 512
29
  inference_steps = 25
30
+
31
  # Get audio duration
32
  audio = AudioSegment.from_file(audio_path)
33
  duration = len(audio) / 1000.0 # Convert to seconds
34
+
35
  face_info = pipe.preprocess(img_path, expand_ratio=expand_ratio)
36
  print(f"Face detection info: {face_info}")
37
  print(f"Audio duration: {duration} seconds")
38
+
39
  if face_info['face_num'] > 0:
40
  crop_image_path = img_path + '.crop.png'
41
  pipe.crop_image(img_path, crop_image_path, face_info['crop_bbox'])
42
  img_path = crop_image_path
43
  os.makedirs(os.path.dirname(res_video_path), exist_ok=True)
44
+
45
  # Process with full audio duration
46
  pipe.process(
47
+ img_path,
48
+ audio_path,
49
+ res_video_path,
50
  min_resolution=min_resolution,
51
  inference_steps=inference_steps,
52
  dynamic_scale=dynamic_scale,
 
57
 
58
  tmp_path = './tmp_path/'
59
  res_path = './res_path/'
60
+ os.makedirs(tmp_path, exist_ok=True)
61
+ os.makedirs(res_path, exist_ok=True)
62
 
63
  def process_sonic(image, audio, dynamic_scale):
64
  # Input validation
 
66
  raise gr.Error("Please upload an image")
67
  if audio is None:
68
  raise gr.Error("Please upload an audio file")
69
+
70
  img_md5 = get_md5(np.array(image))
71
  audio_md5 = get_md5(audio[1])
72
  print(f"Processing with image hash: {img_md5}, audio hash: {audio_md5}")
73
+
74
  sampling_rate, arr = audio[:2]
75
  if len(arr.shape) == 1:
76
  arr = arr[:, None]
77
+
78
  # Create audio segment
79
  audio_segment = AudioSegment(
80
  arr.tobytes(),
 
83
  channels=arr.shape[1]
84
  )
85
  audio_segment = audio_segment.set_frame_rate(sampling_rate)
86
+
87
  # Generate paths
88
  image_path = os.path.abspath(os.path.join(tmp_path, f'{img_md5}.png'))
89
  audio_path = os.path.abspath(os.path.join(tmp_path, f'{audio_md5}.wav'))
90
  res_video_path = os.path.abspath(os.path.join(res_path, f'{img_md5}_{audio_md5}_{dynamic_scale}.mp4'))
91
+
92
  # Save inputs if they don't exist
93
  if not os.path.exists(image_path):
94
  image.save(image_path)
95
  if not os.path.exists(audio_path):
96
  audio_segment.export(audio_path, format="wav")
97
+
98
  # Process or return cached result
99
  if os.path.exists(res_video_path):
100
  print(f"Using cached result: {res_video_path}")
 
103
  print(f"Generating new video with dynamic scale: {dynamic_scale}")
104
  return get_video_res(image_path, audio_path, res_video_path, dynamic_scale)
105
 
106
+ # Dummy get_example function to prevent errors if examples are not defined
107
+ def get_example():
108
+ # 예시가 없다면 빈 리스트를 반환하거나 실제 예시 데이터를 입력할 수 있습니다.
109
+ return []
110
+
111
  # Enhanced UI
112
  css = """
113
  .gradio-container {
 
136
  <p>Transform still images into dynamic videos synchronized with audio</p>
137
  </div>
138
  """)
139
+
140
  with gr.Row():
141
  with gr.Column():
142
  image_input = gr.Image(
143
  type='pil',
144
  label="Portrait Image",
145
+ elem_id="image_input"
 
146
  )
147
+
148
  audio_input = gr.Audio(
149
  label="Voice/Audio Input",
150
  elem_id="audio_input",
151
  type="numpy"
152
  )
153
+
154
  with gr.Box(elem_classes="parameter-section"):
155
  dynamic_scale = gr.Slider(
156
  minimum=0.5,
 
160
  label="Animation Intensity",
161
  info="Adjust to control movement intensity (0.5: subtle, 2.0: dramatic)"
162
  )
163
+
164
  process_btn = gr.Button(
165
+ "Generate Animation",
166
  variant="primary",
167
  elem_id="process_btn"
168
  )
169
+
170
  with gr.Column():
171
  video_output = gr.Video(
172
  label="Generated Animation",
173
  elem_id="video_output"
174
  )
175
+
176
  # Process button click
177
  process_btn.click(
178
  fn=process_sonic,
 
180
  outputs=video_output,
181
  api_name="animate"
182
  )
183
+
184
  # Examples section
185
  gr.Examples(
186
  examples=get_example(),
 
190
  cache_examples=False,
191
  elem_classes="example-section"
192
  )
193
+
194
  # Footer with attribution and links
195
  gr.HTML("""
196
  <div style="text-align: center; margin-top: 2em;">
 
206
  </div>
207
  """)
208
 
209
+ demo.launch()