Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -290,16 +290,16 @@ def preset_changed(preset):
|
|
| 290 |
]
|
| 291 |
|
| 292 |
def generate_video_from_text(
|
| 293 |
-
prompt
|
| 294 |
-
enhance_prompt_toggle
|
| 295 |
-
negative_prompt
|
| 296 |
-
frame_rate
|
| 297 |
-
seed
|
| 298 |
-
num_inference_steps
|
| 299 |
-
guidance_scale
|
| 300 |
-
height
|
| 301 |
-
width
|
| 302 |
-
num_frames
|
| 303 |
progress=gr.Progress(),
|
| 304 |
):
|
| 305 |
if len(prompt.strip()) < 50:
|
|
@@ -308,10 +308,23 @@ def generate_video_from_text(
|
|
| 308 |
duration=5,
|
| 309 |
)
|
| 310 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
# Translate Korean prompts to English
|
| 312 |
prompt = translate_korean_prompt(prompt)
|
| 313 |
negative_prompt = translate_korean_prompt(negative_prompt)
|
| 314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
sample = {
|
| 316 |
"prompt": prompt,
|
| 317 |
"prompt_attention_mask": None,
|
|
@@ -354,7 +367,6 @@ def generate_video_from_text(
|
|
| 354 |
gc.collect()
|
| 355 |
|
| 356 |
output_path = tempfile.mktemp(suffix=".mp4")
|
| 357 |
-
print(images.shape)
|
| 358 |
video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
|
| 359 |
video_np = (video_np * 255).astype(np.uint8)
|
| 360 |
height, width = video_np.shape[1:3]
|
|
@@ -371,21 +383,20 @@ def generate_video_from_text(
|
|
| 371 |
|
| 372 |
def generate_video_from_image(
|
| 373 |
image_path,
|
| 374 |
-
prompt
|
| 375 |
-
enhance_prompt_toggle
|
| 376 |
-
negative_prompt
|
| 377 |
-
frame_rate
|
| 378 |
-
seed
|
| 379 |
-
num_inference_steps
|
| 380 |
-
guidance_scale
|
| 381 |
-
height
|
| 382 |
-
width
|
| 383 |
-
num_frames
|
| 384 |
progress=gr.Progress(),
|
| 385 |
):
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
print("Num Frames: ", num_frames)
|
| 389 |
|
| 390 |
if len(prompt.strip()) < 50:
|
| 391 |
raise gr.Error(
|
|
@@ -393,13 +404,24 @@ def generate_video_from_image(
|
|
| 393 |
duration=5,
|
| 394 |
)
|
| 395 |
|
| 396 |
-
|
| 397 |
-
|
|
|
|
| 398 |
|
| 399 |
# Translate Korean prompts to English
|
| 400 |
prompt = translate_korean_prompt(prompt)
|
| 401 |
negative_prompt = translate_korean_prompt(negative_prompt)
|
| 402 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
media_items = (
|
| 404 |
load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
|
| 405 |
)
|
|
@@ -447,6 +469,7 @@ def generate_video_from_image(
|
|
| 447 |
for frame in video_np[..., ::-1]:
|
| 448 |
out.write(frame)
|
| 449 |
out.release()
|
|
|
|
| 450 |
except Exception as e:
|
| 451 |
raise gr.Error(
|
| 452 |
f"๋น๋์ค ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์. ์ค๋ฅ: {e}",
|
|
@@ -456,6 +479,12 @@ def generate_video_from_image(
|
|
| 456 |
finally:
|
| 457 |
torch.cuda.empty_cache()
|
| 458 |
gc.collect()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
|
| 460 |
return output_path
|
| 461 |
|
|
@@ -813,7 +842,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as iface:
|
|
| 813 |
lines=5,
|
| 814 |
)
|
| 815 |
txt2vid_enhance_toggle = Toggle(
|
| 816 |
-
label="ํ๋กฌํํธ
|
| 817 |
value=False,
|
| 818 |
interactive=True,
|
| 819 |
)
|
|
@@ -1025,35 +1054,46 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as iface:
|
|
| 1025 |
outputs=txt2vid_prompt
|
| 1026 |
)
|
| 1027 |
|
|
|
|
| 1028 |
txt2vid_generate.click(
|
| 1029 |
fn=generate_video_from_text,
|
| 1030 |
inputs=[
|
| 1031 |
-
txt2vid_prompt,
|
| 1032 |
-
txt2vid_enhance_toggle,
|
| 1033 |
-
txt2vid_negative_prompt,
|
| 1034 |
-
txt2vid_frame_rate,
|
| 1035 |
-
|
| 1036 |
-
|
| 1037 |
-
|
| 1038 |
-
|
|
|
|
|
|
|
| 1039 |
],
|
| 1040 |
outputs=txt2vid_output,
|
| 1041 |
-
|
| 1042 |
-
concurrency_id="generate_video",
|
| 1043 |
-
queue=True,
|
| 1044 |
)
|
| 1045 |
|
| 1046 |
-
|
| 1047 |
-
|
| 1048 |
-
fn=
|
| 1049 |
-
inputs=[
|
| 1050 |
-
|
| 1051 |
-
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1056 |
)
|
|
|
|
|
|
|
| 1057 |
|
| 1058 |
img2vid_enhance_toggle.change(
|
| 1059 |
fn=update_prompt_i2v,
|
|
|
|
| 290 |
]
|
| 291 |
|
| 292 |
def generate_video_from_text(
|
| 293 |
+
prompt,
|
| 294 |
+
enhance_prompt_toggle,
|
| 295 |
+
negative_prompt,
|
| 296 |
+
frame_rate,
|
| 297 |
+
seed,
|
| 298 |
+
num_inference_steps,
|
| 299 |
+
guidance_scale,
|
| 300 |
+
height,
|
| 301 |
+
width,
|
| 302 |
+
num_frames,
|
| 303 |
progress=gr.Progress(),
|
| 304 |
):
|
| 305 |
if len(prompt.strip()) < 50:
|
|
|
|
| 308 |
duration=5,
|
| 309 |
)
|
| 310 |
|
| 311 |
+
# ํ๋กฌํํธ ๊ฐ์ ์ด ํ์ฑํ๋ ๊ฒฝ์ฐ
|
| 312 |
+
if enhance_prompt_toggle:
|
| 313 |
+
prompt = enhance_prompt(prompt, "t2v")
|
| 314 |
+
|
| 315 |
# Translate Korean prompts to English
|
| 316 |
prompt = translate_korean_prompt(prompt)
|
| 317 |
negative_prompt = translate_korean_prompt(negative_prompt)
|
| 318 |
|
| 319 |
+
# ๊ธฐ๋ณธ๊ฐ ์ค์
|
| 320 |
+
height = height or 320
|
| 321 |
+
width = width or 512
|
| 322 |
+
num_frames = num_frames or 257
|
| 323 |
+
frame_rate = frame_rate or 25
|
| 324 |
+
seed = seed or 171198
|
| 325 |
+
num_inference_steps = num_inference_steps or 41
|
| 326 |
+
guidance_scale = guidance_scale or 4.0
|
| 327 |
+
|
| 328 |
sample = {
|
| 329 |
"prompt": prompt,
|
| 330 |
"prompt_attention_mask": None,
|
|
|
|
| 367 |
gc.collect()
|
| 368 |
|
| 369 |
output_path = tempfile.mktemp(suffix=".mp4")
|
|
|
|
| 370 |
video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
|
| 371 |
video_np = (video_np * 255).astype(np.uint8)
|
| 372 |
height, width = video_np.shape[1:3]
|
|
|
|
| 383 |
|
| 384 |
def generate_video_from_image(
|
| 385 |
image_path,
|
| 386 |
+
prompt,
|
| 387 |
+
enhance_prompt_toggle,
|
| 388 |
+
negative_prompt,
|
| 389 |
+
frame_rate,
|
| 390 |
+
seed,
|
| 391 |
+
num_inference_steps,
|
| 392 |
+
guidance_scale,
|
| 393 |
+
height,
|
| 394 |
+
width,
|
| 395 |
+
num_frames,
|
| 396 |
progress=gr.Progress(),
|
| 397 |
):
|
| 398 |
+
if not image_path:
|
| 399 |
+
raise gr.Error("์
๋ ฅ ์ด๋ฏธ์ง๋ฅผ ์ ๊ณตํด์ฃผ์ธ์.", duration=5)
|
|
|
|
| 400 |
|
| 401 |
if len(prompt.strip()) < 50:
|
| 402 |
raise gr.Error(
|
|
|
|
| 404 |
duration=5,
|
| 405 |
)
|
| 406 |
|
| 407 |
+
# ํ๋กฌํํธ ๊ฐ์ ์ด ํ์ฑํ๋ ๊ฒฝ์ฐ
|
| 408 |
+
if enhance_prompt_toggle:
|
| 409 |
+
prompt = enhance_prompt(prompt, "i2v")
|
| 410 |
|
| 411 |
# Translate Korean prompts to English
|
| 412 |
prompt = translate_korean_prompt(prompt)
|
| 413 |
negative_prompt = translate_korean_prompt(negative_prompt)
|
| 414 |
|
| 415 |
+
# ๊ธฐ๋ณธ๊ฐ ์ค์
|
| 416 |
+
height = height or 320
|
| 417 |
+
width = width or 512
|
| 418 |
+
num_frames = num_frames or 257
|
| 419 |
+
frame_rate = frame_rate or 25
|
| 420 |
+
seed = seed or 171198
|
| 421 |
+
num_inference_steps = num_inference_steps or 41
|
| 422 |
+
guidance_scale = guidance_scale or 4.0
|
| 423 |
+
|
| 424 |
+
# ์ด๋ฏธ์ง ๋ก๋ ๋ฐ ์ ์ฒ๋ฆฌ
|
| 425 |
media_items = (
|
| 426 |
load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
|
| 427 |
)
|
|
|
|
| 469 |
for frame in video_np[..., ::-1]:
|
| 470 |
out.write(frame)
|
| 471 |
out.release()
|
| 472 |
+
|
| 473 |
except Exception as e:
|
| 474 |
raise gr.Error(
|
| 475 |
f"๋น๋์ค ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์. ์ค๋ฅ: {e}",
|
|
|
|
| 479 |
finally:
|
| 480 |
torch.cuda.empty_cache()
|
| 481 |
gc.collect()
|
| 482 |
+
if 'images' in locals():
|
| 483 |
+
del images
|
| 484 |
+
if 'video_np' in locals():
|
| 485 |
+
del video_np
|
| 486 |
+
if 'media_items' in locals():
|
| 487 |
+
del media_items
|
| 488 |
|
| 489 |
return output_path
|
| 490 |
|
|
|
|
| 842 |
lines=5,
|
| 843 |
)
|
| 844 |
txt2vid_enhance_toggle = Toggle(
|
| 845 |
+
label="ํ๋กฌํํธ ์ฆ๊ฐ",
|
| 846 |
value=False,
|
| 847 |
interactive=True,
|
| 848 |
)
|
|
|
|
| 1054 |
outputs=txt2vid_prompt
|
| 1055 |
)
|
| 1056 |
|
| 1057 |
+
# Event handlers ๋ถ๋ถ ์์
|
| 1058 |
txt2vid_generate.click(
|
| 1059 |
fn=generate_video_from_text,
|
| 1060 |
inputs=[
|
| 1061 |
+
txt2vid_prompt, # ํ
์คํธ ์
๋ ฅ
|
| 1062 |
+
txt2vid_enhance_toggle, # ํ๋กฌํํธ ๊ฐ์ ํ ๊ธ
|
| 1063 |
+
txt2vid_negative_prompt, # ๋ค๊ฑฐํฐ๋ธ ํ๋กฌํํธ
|
| 1064 |
+
txt2vid_frame_rate, # ํ๋ ์ ๋ ์ดํธ
|
| 1065 |
+
txt2vid_advanced[0], # seed
|
| 1066 |
+
txt2vid_advanced[1], # inference_steps
|
| 1067 |
+
txt2vid_advanced[2], # guidance_scale
|
| 1068 |
+
height_slider, # height
|
| 1069 |
+
width_slider, # width
|
| 1070 |
+
num_frames_slider, # num_frames
|
| 1071 |
],
|
| 1072 |
outputs=txt2vid_output,
|
| 1073 |
+
api_name="generate_text_to_video"
|
|
|
|
|
|
|
| 1074 |
)
|
| 1075 |
|
| 1076 |
+
# Image to Video ์ด๋ฒคํธ ํธ๋ค๋ฌ ์์
|
| 1077 |
+
img2vid_generate.click(
|
| 1078 |
+
fn=generate_video_from_image,
|
| 1079 |
+
inputs=[
|
| 1080 |
+
img2vid_image, # ์
๋ ฅ ์ด๋ฏธ์ง
|
| 1081 |
+
img2vid_prompt, # ํ
์คํธ ์
๋ ฅ
|
| 1082 |
+
img2vid_enhance_toggle, # ํ๋กฌํํธ ๊ฐ์ ํ ๊ธ
|
| 1083 |
+
img2vid_negative_prompt, # ๋ค๊ฑฐํฐ๋ธ ํ๋กฌํํธ
|
| 1084 |
+
img2vid_frame_rate, # ํ๋ ์ ๋ ์ดํธ
|
| 1085 |
+
img2vid_advanced[0], # seed
|
| 1086 |
+
img2vid_advanced[1], # inference_steps
|
| 1087 |
+
img2vid_advanced[2], # guidance_scale
|
| 1088 |
+
height_slider, # height
|
| 1089 |
+
width_slider, # width
|
| 1090 |
+
num_frames_slider, # num_frames
|
| 1091 |
+
],
|
| 1092 |
+
outputs=img2vid_output,
|
| 1093 |
+
api_name="generate_image_to_video"
|
| 1094 |
)
|
| 1095 |
+
|
| 1096 |
+
|
| 1097 |
|
| 1098 |
img2vid_enhance_toggle.change(
|
| 1099 |
fn=update_prompt_i2v,
|