yerang commited on
Commit
2c29dd0
·
verified ·
1 Parent(s): f5dae9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +224 -224
app.py CHANGED
@@ -22,7 +22,7 @@ from elevenlabs_utils import ElevenLabsPipeline
22
  from setup_environment import initialize_environment
23
  from src.utils.video import extract_audio
24
  #from flux_dev import create_flux_tab
25
- from flux_schnell import create_flux_tab
26
  # from diffusers import FluxPipeline
27
 
28
  # import gdown
@@ -133,267 +133,267 @@ from stf_utils import STFPipeline
133
  # return stf_pipeline.execute(audio_path)
134
 
135
 
136
- # ###### 테스트중 ######
137
 
138
 
139
- # stf_pipeline = STFPipeline()
140
- # driving_video_path=gr.Video()
141
 
142
- # # set tyro theme
143
- # tyro.extras.set_accent_color("bright_cyan")
144
- # args = tyro.cli(ArgumentConfig)
145
 
146
- # with gr.Blocks(theme=gr.themes.Soft()) as demo:
147
- # with gr.Row():
148
- # audio_path_component = gr.Textbox(label="Input", value="assets/examples/driving/test_aud.mp3")
149
- # stf_button = gr.Button("stf test", variant="primary")
150
- # stf_button.click(
151
- # fn=gpu_wrapped_stf_pipeline_execute,
152
- # inputs=[
153
- # audio_path_component
154
- # ],
155
- # outputs=[driving_video_path]
156
- # )
157
- # with gr.Row():
158
- # driving_video_path.render()
159
 
160
- # with gr.Row():
161
- # create_flux_tab() # image_input을 flux_tab에 전달합니다.
162
 
163
- # ###### 테스트중 ######
164
 
165
 
166
- def partial_fields(target_class, kwargs):
167
- return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
168
 
169
- # set tyro theme
170
- tyro.extras.set_accent_color("bright_cyan")
171
- args = tyro.cli(ArgumentConfig)
172
 
173
- # specify configs for inference
174
- inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
175
- crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
176
 
177
- gradio_pipeline = GradioPipeline(
178
- inference_cfg=inference_cfg,
179
- crop_cfg=crop_cfg,
180
- args=args
181
- )
182
 
183
- # 추가 정의
184
- elevenlabs_pipeline = ElevenLabsPipeline()
185
- stf_pipeline = STFPipeline()
186
 
187
 
188
- @spaces.GPU() #duration=240)
189
- def gpu_wrapped_stf_pipeline_execute(audio_path):
190
- return stf_pipeline.execute(audio_path)
191
 
192
 
193
- @spaces.GPU()
194
- def gpu_wrapped_elevenlabs_pipeline_generate_voice(text, voice):
195
- return elevenlabs_pipeline.generate_voice(text, voice)
196
 
197
 
198
 
199
- @spaces.GPU()
200
- def gpu_wrapped_execute_video(*args, **kwargs):
201
- return gradio_pipeline.execute_video(*args, **kwargs)
202
 
203
- @spaces.GPU()
204
- def gpu_wrapped_execute_image(*args, **kwargs):
205
- return gradio_pipeline.execute_image(*args, **kwargs)
206
 
207
- def is_square_video(video_path):
208
- video = cv2.VideoCapture(video_path)
209
 
210
- width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
211
- height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
212
 
213
- video.release()
214
- if width != height:
215
- raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
216
 
217
- return gr.update(visible=True)
218
 
219
- def txt_to_driving_video(text):
220
- audio_path = gpu_wrapped_elevenlabs_pipeline_generate_voice(text)
221
- driving_video_path = gpu_wrapped_stf_pipeline_execute(audio_path)
222
- return driving_video_path
223
 
224
- # assets
225
- title_md = "assets/gradio_title.md"
226
- example_portrait_dir = "assets/examples/source"
227
- example_portrait_dir_custom = "assets/examples/source"
228
- example_video_dir = "assets/examples/driving"
229
- data_examples = [
230
- [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
231
- [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
232
- [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
233
- [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d18.mp4"), True, True, True, True],
234
- [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d19.mp4"), True, True, True, True],
235
- [osp.join(example_portrait_dir, "s22.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
236
- ]
237
- #################### interface logic ####################
238
 
239
- # Define components first
240
- eye_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target eyes-open ratio")
241
- lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target lip-open ratio")
242
- retargeting_input_image = gr.Image(type="filepath")
243
- output_image = gr.Image(type="numpy")
244
- output_image_paste_back = gr.Image(type="numpy")
245
- output_video = gr.Video()
246
- output_video_concat = gr.Video()
247
 
248
- # video_input = gr.Video()
249
- driving_video_path=gr.Video()
250
 
251
 
252
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
253
- #gr.HTML(load_description(title_md))
254
-
255
- with gr.Tabs():
256
- with gr.Tab("Text to LipSync"):
257
- gr.Markdown("# Text to LipSync")
258
- with gr.Row():
259
- with gr.Column():
260
- script_txt = gr.Text()
261
- # with gr.Column():
262
- # txt2video_gen_button = gr.Button("txt2video generation", variant="primary")
263
-
264
- with gr.Column():
265
- audio_gen_button = gr.Button("Audio generation", variant="primary")
266
- with gr.Row():
267
- output_audio = gr.Audio(label="Generated audio", type="filepath")
268
- with gr.Row():
269
- video_gen_button = gr.Button("Audio to Video generation", variant="primary")
270
 
271
 
272
 
273
- gr.Markdown(load_description("assets/gradio_description_upload.md"))
274
- with gr.Row():
275
- with gr.Accordion(open=True, label="Source Portrait"):
276
- image_input = gr.Image(type="filepath")
277
- gr.Examples(
278
- examples=[
279
- [osp.join(example_portrait_dir, "01.webp")],
280
- [osp.join(example_portrait_dir, "02.webp")],
281
- [osp.join(example_portrait_dir, "03.jpg")],
282
- [osp.join(example_portrait_dir, "04.jpg")],
283
- [osp.join(example_portrait_dir, "05.jpg")],
284
- [osp.join(example_portrait_dir, "06.jpg")],
285
- [osp.join(example_portrait_dir, "07.jpg")],
286
- [osp.join(example_portrait_dir, "08.jpg")],
287
- ],
288
- inputs=[image_input],
289
- cache_examples=False,
290
- )
291
- with gr.Accordion(open=True, label="Driving Video"):
292
- video_input = gr.Video()
293
- gr.Examples(
294
- examples=[
295
- [osp.join(example_video_dir, "d0.mp4")],
296
- [osp.join(example_video_dir, "d18.mp4")],
297
- [osp.join(example_video_dir, "d19.mp4")],
298
- [osp.join(example_video_dir, "d14_trim.mp4")],
299
- [osp.join(example_video_dir, "d6_trim.mp4")],
300
- ],
301
- inputs=[video_input],
302
- cache_examples=False,
303
- )
304
- with gr.Row():
305
- with gr.Accordion(open=False, label="Animation Instructions and Options"):
306
- gr.Markdown(load_description("assets/gradio_description_animation.md"))
307
- with gr.Row():
308
- flag_relative_input = gr.Checkbox(value=True, label="relative motion")
309
- flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
310
- flag_remap_input = gr.Checkbox(value=True, label="paste-back")
311
- gr.Markdown(load_description("assets/gradio_description_animate_clear.md"))
312
- with gr.Row():
313
- with gr.Column():
314
- process_button_animation = gr.Button("🚀 Animate", variant="primary")
315
- with gr.Column():
316
- process_button_reset = gr.ClearButton([image_input, video_input, output_video, output_video_concat], value="🧹 Clear")
317
- with gr.Row():
318
- with gr.Column():
319
- with gr.Accordion(open=True, label="The animated video in the original image space"):
320
- output_video.render()
321
- with gr.Column():
322
- with gr.Accordion(open=True, label="The animated video"):
323
- output_video_concat.render()
324
- with gr.Row():
325
- # Examples
326
- gr.Markdown("## You could also choose the examples below by one click ⬇️")
327
- with gr.Row():
328
- gr.Examples(
329
- examples=data_examples,
330
- fn=gpu_wrapped_execute_video,
331
- inputs=[
332
- image_input,
333
- video_input,
334
- flag_relative_input,
335
- flag_do_crop_input,
336
- flag_remap_input
337
- ],
338
- outputs=[output_image, output_image_paste_back],
339
- examples_per_page=6,
340
- cache_examples=False,
341
- )
342
 
343
- process_button_animation.click(
344
- fn=gpu_wrapped_execute_video,
345
- inputs=[
346
- image_input,
347
- video_input,
348
- flag_relative_input,
349
- flag_do_crop_input,
350
- flag_remap_input
351
- ],
352
- outputs=[output_video, output_video_concat],
353
- show_progress=True
354
- )
355
- # txt2video_gen_button.click(
356
- # fn=txt_to_driving_video,
357
- # inputs=[
358
- # script_txt
359
- # ],
360
- # outputs=[video_input],
361
- # show_progress=True
362
- # )
363
- audio_gen_button.click(
364
- fn=gpu_wrapped_elevenlabs_pipeline_generate_voice,
365
- inputs=[
366
- script_txt
367
- ],
368
- outputs=[output_audio],
369
- show_progress=True
370
- )
371
-
372
- video_gen_button.click(
373
- fn=gpu_wrapped_stf_pipeline_execute,
374
- inputs=[
375
- output_audio
376
- ],
377
- outputs=[video_input],
378
- show_progress=True
379
- )
380
 
381
 
382
 
383
- # image_input.change(
384
- # fn=gradio_pipeline.prepare_retargeting,
385
- # inputs=image_input,
386
- # outputs=[eye_retargeting_slider, lip_retargeting_slider, retargeting_input_image]
387
- # )
388
- video_input.upload(
389
- fn=is_square_video,
390
- inputs=video_input,
391
- outputs=video_input
392
- )
393
 
394
- # 세 번째 탭: Flux 개발용 탭
395
- with gr.Tab("FLUX Image"):
396
- flux_demo = create_flux_tab(image_input) # Flux 개발용 탭 생성
397
 
398
  demo.launch(
399
  server_port=args.server_port,
 
22
  from setup_environment import initialize_environment
23
  from src.utils.video import extract_audio
24
  #from flux_dev import create_flux_tab
25
+ # from flux_schnell import create_flux_tab
26
  # from diffusers import FluxPipeline
27
 
28
  # import gdown
 
133
  # return stf_pipeline.execute(audio_path)
134
 
135
 
136
+ ###### 테스트중 ######
137
 
138
 
139
+ stf_pipeline = STFPipeline()
140
+ driving_video_path=gr.Video()
141
 
142
+ # set tyro theme
143
+ tyro.extras.set_accent_color("bright_cyan")
144
+ args = tyro.cli(ArgumentConfig)
145
 
146
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
147
+ with gr.Row():
148
+ audio_path_component = gr.Textbox(label="Input", value="assets/examples/driving/test_aud.mp3")
149
+ stf_button = gr.Button("stf test", variant="primary")
150
+ stf_button.click(
151
+ fn=gpu_wrapped_stf_pipeline_execute,
152
+ inputs=[
153
+ audio_path_component
154
+ ],
155
+ outputs=[driving_video_path]
156
+ )
157
+ with gr.Row():
158
+ driving_video_path.render()
159
 
160
+ # with gr.Row():
161
+ # create_flux_tab() # image_input을 flux_tab에 전달합니다.
162
 
163
+ ###### 테스트중 ######
164
 
165
 
166
+ # def partial_fields(target_class, kwargs):
167
+ # return target_class(**{k: v for k, v in kwargs.items() if hasattr(target_class, k)})
168
 
169
+ # # set tyro theme
170
+ # tyro.extras.set_accent_color("bright_cyan")
171
+ # args = tyro.cli(ArgumentConfig)
172
 
173
+ # # specify configs for inference
174
+ # inference_cfg = partial_fields(InferenceConfig, args.__dict__) # use attribute of args to initial InferenceConfig
175
+ # crop_cfg = partial_fields(CropConfig, args.__dict__) # use attribute of args to initial CropConfig
176
 
177
+ # gradio_pipeline = GradioPipeline(
178
+ # inference_cfg=inference_cfg,
179
+ # crop_cfg=crop_cfg,
180
+ # args=args
181
+ # )
182
 
183
+ # # 추가 정의
184
+ # elevenlabs_pipeline = ElevenLabsPipeline()
185
+ # stf_pipeline = STFPipeline()
186
 
187
 
188
+ # @spaces.GPU() #duration=240)
189
+ # def gpu_wrapped_stf_pipeline_execute(audio_path):
190
+ # return stf_pipeline.execute(audio_path)
191
 
192
 
193
+ # @spaces.GPU()
194
+ # def gpu_wrapped_elevenlabs_pipeline_generate_voice(text, voice):
195
+ # return elevenlabs_pipeline.generate_voice(text, voice)
196
 
197
 
198
 
199
+ # @spaces.GPU()
200
+ # def gpu_wrapped_execute_video(*args, **kwargs):
201
+ # return gradio_pipeline.execute_video(*args, **kwargs)
202
 
203
+ # @spaces.GPU()
204
+ # def gpu_wrapped_execute_image(*args, **kwargs):
205
+ # return gradio_pipeline.execute_image(*args, **kwargs)
206
 
207
+ # def is_square_video(video_path):
208
+ # video = cv2.VideoCapture(video_path)
209
 
210
+ # width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
211
+ # height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
212
 
213
+ # video.release()
214
+ # if width != height:
215
+ # raise gr.Error("Error: the video does not have a square aspect ratio. We currently only support square videos")
216
 
217
+ # return gr.update(visible=True)
218
 
219
+ # def txt_to_driving_video(text):
220
+ # audio_path = gpu_wrapped_elevenlabs_pipeline_generate_voice(text)
221
+ # driving_video_path = gpu_wrapped_stf_pipeline_execute(audio_path)
222
+ # return driving_video_path
223
 
224
+ # # assets
225
+ # title_md = "assets/gradio_title.md"
226
+ # example_portrait_dir = "assets/examples/source"
227
+ # example_portrait_dir_custom = "assets/examples/source"
228
+ # example_video_dir = "assets/examples/driving"
229
+ # data_examples = [
230
+ # [osp.join(example_portrait_dir, "s9.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
231
+ # [osp.join(example_portrait_dir, "s6.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
232
+ # [osp.join(example_portrait_dir, "s10.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
233
+ # [osp.join(example_portrait_dir, "s5.jpg"), osp.join(example_video_dir, "d18.mp4"), True, True, True, True],
234
+ # [osp.join(example_portrait_dir, "s7.jpg"), osp.join(example_video_dir, "d19.mp4"), True, True, True, True],
235
+ # [osp.join(example_portrait_dir, "s22.jpg"), osp.join(example_video_dir, "d0.mp4"), True, True, True, True],
236
+ # ]
237
+ # #################### interface logic ####################
238
 
239
+ # # Define components first
240
+ # eye_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target eyes-open ratio")
241
+ # lip_retargeting_slider = gr.Slider(minimum=0, maximum=0.8, step=0.01, label="target lip-open ratio")
242
+ # retargeting_input_image = gr.Image(type="filepath")
243
+ # output_image = gr.Image(type="numpy")
244
+ # output_image_paste_back = gr.Image(type="numpy")
245
+ # output_video = gr.Video()
246
+ # output_video_concat = gr.Video()
247
 
248
+ # # video_input = gr.Video()
249
+ # driving_video_path=gr.Video()
250
 
251
 
252
+ # with gr.Blocks(theme=gr.themes.Soft()) as demo:
253
+ # #gr.HTML(load_description(title_md))
254
+
255
+ # with gr.Tabs():
256
+ # with gr.Tab("Text to LipSync"):
257
+ # gr.Markdown("# Text to LipSync")
258
+ # with gr.Row():
259
+ # with gr.Column():
260
+ # script_txt = gr.Text()
261
+ # # with gr.Column():
262
+ # # txt2video_gen_button = gr.Button("txt2video generation", variant="primary")
263
+
264
+ # with gr.Column():
265
+ # audio_gen_button = gr.Button("Audio generation", variant="primary")
266
+ # with gr.Row():
267
+ # output_audio = gr.Audio(label="Generated audio", type="filepath")
268
+ # with gr.Row():
269
+ # video_gen_button = gr.Button("Audio to Video generation", variant="primary")
270
 
271
 
272
 
273
+ # gr.Markdown(load_description("assets/gradio_description_upload.md"))
274
+ # with gr.Row():
275
+ # with gr.Accordion(open=True, label="Source Portrait"):
276
+ # image_input = gr.Image(type="filepath")
277
+ # gr.Examples(
278
+ # examples=[
279
+ # [osp.join(example_portrait_dir, "01.webp")],
280
+ # [osp.join(example_portrait_dir, "02.webp")],
281
+ # [osp.join(example_portrait_dir, "03.jpg")],
282
+ # [osp.join(example_portrait_dir, "04.jpg")],
283
+ # [osp.join(example_portrait_dir, "05.jpg")],
284
+ # [osp.join(example_portrait_dir, "06.jpg")],
285
+ # [osp.join(example_portrait_dir, "07.jpg")],
286
+ # [osp.join(example_portrait_dir, "08.jpg")],
287
+ # ],
288
+ # inputs=[image_input],
289
+ # cache_examples=False,
290
+ # )
291
+ # with gr.Accordion(open=True, label="Driving Video"):
292
+ # video_input = gr.Video()
293
+ # gr.Examples(
294
+ # examples=[
295
+ # [osp.join(example_video_dir, "d0.mp4")],
296
+ # [osp.join(example_video_dir, "d18.mp4")],
297
+ # [osp.join(example_video_dir, "d19.mp4")],
298
+ # [osp.join(example_video_dir, "d14_trim.mp4")],
299
+ # [osp.join(example_video_dir, "d6_trim.mp4")],
300
+ # ],
301
+ # inputs=[video_input],
302
+ # cache_examples=False,
303
+ # )
304
+ # with gr.Row():
305
+ # with gr.Accordion(open=False, label="Animation Instructions and Options"):
306
+ # gr.Markdown(load_description("assets/gradio_description_animation.md"))
307
+ # with gr.Row():
308
+ # flag_relative_input = gr.Checkbox(value=True, label="relative motion")
309
+ # flag_do_crop_input = gr.Checkbox(value=True, label="do crop")
310
+ # flag_remap_input = gr.Checkbox(value=True, label="paste-back")
311
+ # gr.Markdown(load_description("assets/gradio_description_animate_clear.md"))
312
+ # with gr.Row():
313
+ # with gr.Column():
314
+ # process_button_animation = gr.Button("🚀 Animate", variant="primary")
315
+ # with gr.Column():
316
+ # process_button_reset = gr.ClearButton([image_input, video_input, output_video, output_video_concat], value="🧹 Clear")
317
+ # with gr.Row():
318
+ # with gr.Column():
319
+ # with gr.Accordion(open=True, label="The animated video in the original image space"):
320
+ # output_video.render()
321
+ # with gr.Column():
322
+ # with gr.Accordion(open=True, label="The animated video"):
323
+ # output_video_concat.render()
324
+ # with gr.Row():
325
+ # # Examples
326
+ # gr.Markdown("## You could also choose the examples below by one click ⬇️")
327
+ # with gr.Row():
328
+ # gr.Examples(
329
+ # examples=data_examples,
330
+ # fn=gpu_wrapped_execute_video,
331
+ # inputs=[
332
+ # image_input,
333
+ # video_input,
334
+ # flag_relative_input,
335
+ # flag_do_crop_input,
336
+ # flag_remap_input
337
+ # ],
338
+ # outputs=[output_image, output_image_paste_back],
339
+ # examples_per_page=6,
340
+ # cache_examples=False,
341
+ # )
342
 
343
+ # process_button_animation.click(
344
+ # fn=gpu_wrapped_execute_video,
345
+ # inputs=[
346
+ # image_input,
347
+ # video_input,
348
+ # flag_relative_input,
349
+ # flag_do_crop_input,
350
+ # flag_remap_input
351
+ # ],
352
+ # outputs=[output_video, output_video_concat],
353
+ # show_progress=True
354
+ # )
355
+ # # txt2video_gen_button.click(
356
+ # # fn=txt_to_driving_video,
357
+ # # inputs=[
358
+ # # script_txt
359
+ # # ],
360
+ # # outputs=[video_input],
361
+ # # show_progress=True
362
+ # # )
363
+ # audio_gen_button.click(
364
+ # fn=gpu_wrapped_elevenlabs_pipeline_generate_voice,
365
+ # inputs=[
366
+ # script_txt
367
+ # ],
368
+ # outputs=[output_audio],
369
+ # show_progress=True
370
+ # )
371
+
372
+ # video_gen_button.click(
373
+ # fn=gpu_wrapped_stf_pipeline_execute,
374
+ # inputs=[
375
+ # output_audio
376
+ # ],
377
+ # outputs=[video_input],
378
+ # show_progress=True
379
+ # )
380
 
381
 
382
 
383
+ # # image_input.change(
384
+ # # fn=gradio_pipeline.prepare_retargeting,
385
+ # # inputs=image_input,
386
+ # # outputs=[eye_retargeting_slider, lip_retargeting_slider, retargeting_input_image]
387
+ # # )
388
+ # video_input.upload(
389
+ # fn=is_square_video,
390
+ # inputs=video_input,
391
+ # outputs=video_input
392
+ # )
393
 
394
+ # # 세 번째 탭: Flux 개발용 탭
395
+ # with gr.Tab("FLUX Image"):
396
+ # flux_demo = create_flux_tab(image_input) # Flux 개발용 탭 생성
397
 
398
  demo.launch(
399
  server_port=args.server_port,