Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -290,16 +290,16 @@ def preset_changed(preset):
|
|
290 |
]
|
291 |
|
292 |
def generate_video_from_text(
|
293 |
-
prompt
|
294 |
-
enhance_prompt_toggle
|
295 |
-
negative_prompt
|
296 |
-
frame_rate
|
297 |
-
seed
|
298 |
-
num_inference_steps
|
299 |
-
guidance_scale
|
300 |
-
height
|
301 |
-
width
|
302 |
-
num_frames
|
303 |
progress=gr.Progress(),
|
304 |
):
|
305 |
if len(prompt.strip()) < 50:
|
@@ -308,10 +308,23 @@ def generate_video_from_text(
|
|
308 |
duration=5,
|
309 |
)
|
310 |
|
|
|
|
|
|
|
|
|
311 |
# Translate Korean prompts to English
|
312 |
prompt = translate_korean_prompt(prompt)
|
313 |
negative_prompt = translate_korean_prompt(negative_prompt)
|
314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
sample = {
|
316 |
"prompt": prompt,
|
317 |
"prompt_attention_mask": None,
|
@@ -354,7 +367,6 @@ def generate_video_from_text(
|
|
354 |
gc.collect()
|
355 |
|
356 |
output_path = tempfile.mktemp(suffix=".mp4")
|
357 |
-
print(images.shape)
|
358 |
video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
|
359 |
video_np = (video_np * 255).astype(np.uint8)
|
360 |
height, width = video_np.shape[1:3]
|
@@ -371,21 +383,20 @@ def generate_video_from_text(
|
|
371 |
|
372 |
def generate_video_from_image(
|
373 |
image_path,
|
374 |
-
prompt
|
375 |
-
enhance_prompt_toggle
|
376 |
-
negative_prompt
|
377 |
-
frame_rate
|
378 |
-
seed
|
379 |
-
num_inference_steps
|
380 |
-
guidance_scale
|
381 |
-
height
|
382 |
-
width
|
383 |
-
num_frames
|
384 |
progress=gr.Progress(),
|
385 |
):
|
386 |
-
|
387 |
-
|
388 |
-
print("Num Frames: ", num_frames)
|
389 |
|
390 |
if len(prompt.strip()) < 50:
|
391 |
raise gr.Error(
|
@@ -393,13 +404,24 @@ def generate_video_from_image(
|
|
393 |
duration=5,
|
394 |
)
|
395 |
|
396 |
-
|
397 |
-
|
|
|
398 |
|
399 |
# Translate Korean prompts to English
|
400 |
prompt = translate_korean_prompt(prompt)
|
401 |
negative_prompt = translate_korean_prompt(negative_prompt)
|
402 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
media_items = (
|
404 |
load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
|
405 |
)
|
@@ -447,6 +469,7 @@ def generate_video_from_image(
|
|
447 |
for frame in video_np[..., ::-1]:
|
448 |
out.write(frame)
|
449 |
out.release()
|
|
|
450 |
except Exception as e:
|
451 |
raise gr.Error(
|
452 |
f"๋น๋์ค ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์. ์ค๋ฅ: {e}",
|
@@ -456,6 +479,12 @@ def generate_video_from_image(
|
|
456 |
finally:
|
457 |
torch.cuda.empty_cache()
|
458 |
gc.collect()
|
|
|
|
|
|
|
|
|
|
|
|
|
459 |
|
460 |
return output_path
|
461 |
|
@@ -813,7 +842,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as iface:
|
|
813 |
lines=5,
|
814 |
)
|
815 |
txt2vid_enhance_toggle = Toggle(
|
816 |
-
label="ํ๋กฌํํธ
|
817 |
value=False,
|
818 |
interactive=True,
|
819 |
)
|
@@ -1025,35 +1054,46 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as iface:
|
|
1025 |
outputs=txt2vid_prompt
|
1026 |
)
|
1027 |
|
|
|
1028 |
txt2vid_generate.click(
|
1029 |
fn=generate_video_from_text,
|
1030 |
inputs=[
|
1031 |
-
txt2vid_prompt,
|
1032 |
-
txt2vid_enhance_toggle,
|
1033 |
-
txt2vid_negative_prompt,
|
1034 |
-
txt2vid_frame_rate,
|
1035 |
-
|
1036 |
-
|
1037 |
-
|
1038 |
-
|
|
|
|
|
1039 |
],
|
1040 |
outputs=txt2vid_output,
|
1041 |
-
|
1042 |
-
concurrency_id="generate_video",
|
1043 |
-
queue=True,
|
1044 |
)
|
1045 |
|
1046 |
-
|
1047 |
-
|
1048 |
-
fn=
|
1049 |
-
inputs=[
|
1050 |
-
|
1051 |
-
|
1052 |
-
|
1053 |
-
|
1054 |
-
|
1055 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1056 |
)
|
|
|
|
|
1057 |
|
1058 |
img2vid_enhance_toggle.change(
|
1059 |
fn=update_prompt_i2v,
|
|
|
290 |
]
|
291 |
|
292 |
def generate_video_from_text(
|
293 |
+
prompt,
|
294 |
+
enhance_prompt_toggle,
|
295 |
+
negative_prompt,
|
296 |
+
frame_rate,
|
297 |
+
seed,
|
298 |
+
num_inference_steps,
|
299 |
+
guidance_scale,
|
300 |
+
height,
|
301 |
+
width,
|
302 |
+
num_frames,
|
303 |
progress=gr.Progress(),
|
304 |
):
|
305 |
if len(prompt.strip()) < 50:
|
|
|
308 |
duration=5,
|
309 |
)
|
310 |
|
311 |
+
# ํ๋กฌํํธ ๊ฐ์ ์ด ํ์ฑํ๋ ๊ฒฝ์ฐ
|
312 |
+
if enhance_prompt_toggle:
|
313 |
+
prompt = enhance_prompt(prompt, "t2v")
|
314 |
+
|
315 |
# Translate Korean prompts to English
|
316 |
prompt = translate_korean_prompt(prompt)
|
317 |
negative_prompt = translate_korean_prompt(negative_prompt)
|
318 |
|
319 |
+
# ๊ธฐ๋ณธ๊ฐ ์ค์
|
320 |
+
height = height or 320
|
321 |
+
width = width or 512
|
322 |
+
num_frames = num_frames or 257
|
323 |
+
frame_rate = frame_rate or 25
|
324 |
+
seed = seed or 171198
|
325 |
+
num_inference_steps = num_inference_steps or 41
|
326 |
+
guidance_scale = guidance_scale or 4.0
|
327 |
+
|
328 |
sample = {
|
329 |
"prompt": prompt,
|
330 |
"prompt_attention_mask": None,
|
|
|
367 |
gc.collect()
|
368 |
|
369 |
output_path = tempfile.mktemp(suffix=".mp4")
|
|
|
370 |
video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
|
371 |
video_np = (video_np * 255).astype(np.uint8)
|
372 |
height, width = video_np.shape[1:3]
|
|
|
383 |
|
384 |
def generate_video_from_image(
|
385 |
image_path,
|
386 |
+
prompt,
|
387 |
+
enhance_prompt_toggle,
|
388 |
+
negative_prompt,
|
389 |
+
frame_rate,
|
390 |
+
seed,
|
391 |
+
num_inference_steps,
|
392 |
+
guidance_scale,
|
393 |
+
height,
|
394 |
+
width,
|
395 |
+
num_frames,
|
396 |
progress=gr.Progress(),
|
397 |
):
|
398 |
+
if not image_path:
|
399 |
+
raise gr.Error("์
๋ ฅ ์ด๋ฏธ์ง๋ฅผ ์ ๊ณตํด์ฃผ์ธ์.", duration=5)
|
|
|
400 |
|
401 |
if len(prompt.strip()) < 50:
|
402 |
raise gr.Error(
|
|
|
404 |
duration=5,
|
405 |
)
|
406 |
|
407 |
+
# ํ๋กฌํํธ ๊ฐ์ ์ด ํ์ฑํ๋ ๊ฒฝ์ฐ
|
408 |
+
if enhance_prompt_toggle:
|
409 |
+
prompt = enhance_prompt(prompt, "i2v")
|
410 |
|
411 |
# Translate Korean prompts to English
|
412 |
prompt = translate_korean_prompt(prompt)
|
413 |
negative_prompt = translate_korean_prompt(negative_prompt)
|
414 |
|
415 |
+
# ๊ธฐ๋ณธ๊ฐ ์ค์
|
416 |
+
height = height or 320
|
417 |
+
width = width or 512
|
418 |
+
num_frames = num_frames or 257
|
419 |
+
frame_rate = frame_rate or 25
|
420 |
+
seed = seed or 171198
|
421 |
+
num_inference_steps = num_inference_steps or 41
|
422 |
+
guidance_scale = guidance_scale or 4.0
|
423 |
+
|
424 |
+
# ์ด๋ฏธ์ง ๋ก๋ ๋ฐ ์ ์ฒ๋ฆฌ
|
425 |
media_items = (
|
426 |
load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
|
427 |
)
|
|
|
469 |
for frame in video_np[..., ::-1]:
|
470 |
out.write(frame)
|
471 |
out.release()
|
472 |
+
|
473 |
except Exception as e:
|
474 |
raise gr.Error(
|
475 |
f"๋น๋์ค ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์. ์ค๋ฅ: {e}",
|
|
|
479 |
finally:
|
480 |
torch.cuda.empty_cache()
|
481 |
gc.collect()
|
482 |
+
if 'images' in locals():
|
483 |
+
del images
|
484 |
+
if 'video_np' in locals():
|
485 |
+
del video_np
|
486 |
+
if 'media_items' in locals():
|
487 |
+
del media_items
|
488 |
|
489 |
return output_path
|
490 |
|
|
|
842 |
lines=5,
|
843 |
)
|
844 |
txt2vid_enhance_toggle = Toggle(
|
845 |
+
label="ํ๋กฌํํธ ์ฆ๊ฐ",
|
846 |
value=False,
|
847 |
interactive=True,
|
848 |
)
|
|
|
1054 |
outputs=txt2vid_prompt
|
1055 |
)
|
1056 |
|
1057 |
+
# Event handlers ๋ถ๋ถ ์์
|
1058 |
txt2vid_generate.click(
|
1059 |
fn=generate_video_from_text,
|
1060 |
inputs=[
|
1061 |
+
txt2vid_prompt, # ํ
์คํธ ์
๋ ฅ
|
1062 |
+
txt2vid_enhance_toggle, # ํ๋กฌํํธ ๊ฐ์ ํ ๊ธ
|
1063 |
+
txt2vid_negative_prompt, # ๋ค๊ฑฐํฐ๋ธ ํ๋กฌํํธ
|
1064 |
+
txt2vid_frame_rate, # ํ๋ ์ ๋ ์ดํธ
|
1065 |
+
txt2vid_advanced[0], # seed
|
1066 |
+
txt2vid_advanced[1], # inference_steps
|
1067 |
+
txt2vid_advanced[2], # guidance_scale
|
1068 |
+
height_slider, # height
|
1069 |
+
width_slider, # width
|
1070 |
+
num_frames_slider, # num_frames
|
1071 |
],
|
1072 |
outputs=txt2vid_output,
|
1073 |
+
api_name="generate_text_to_video"
|
|
|
|
|
1074 |
)
|
1075 |
|
1076 |
+
# Image to Video ์ด๋ฒคํธ ํธ๋ค๋ฌ ์์
|
1077 |
+
img2vid_generate.click(
|
1078 |
+
fn=generate_video_from_image,
|
1079 |
+
inputs=[
|
1080 |
+
img2vid_image, # ์
๋ ฅ ์ด๋ฏธ์ง
|
1081 |
+
img2vid_prompt, # ํ
์คํธ ์
๋ ฅ
|
1082 |
+
img2vid_enhance_toggle, # ํ๋กฌํํธ ๊ฐ์ ํ ๊ธ
|
1083 |
+
img2vid_negative_prompt, # ๋ค๊ฑฐํฐ๋ธ ํ๋กฌํํธ
|
1084 |
+
img2vid_frame_rate, # ํ๋ ์ ๋ ์ดํธ
|
1085 |
+
img2vid_advanced[0], # seed
|
1086 |
+
img2vid_advanced[1], # inference_steps
|
1087 |
+
img2vid_advanced[2], # guidance_scale
|
1088 |
+
height_slider, # height
|
1089 |
+
width_slider, # width
|
1090 |
+
num_frames_slider, # num_frames
|
1091 |
+
],
|
1092 |
+
outputs=img2vid_output,
|
1093 |
+
api_name="generate_image_to_video"
|
1094 |
)
|
1095 |
+
|
1096 |
+
|
1097 |
|
1098 |
img2vid_enhance_toggle.change(
|
1099 |
fn=update_prompt_i2v,
|