openfree commited on
Commit
b25c837
ยท
verified ยท
1 Parent(s): ba73797

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -48
app.py CHANGED
@@ -290,16 +290,16 @@ def preset_changed(preset):
290
  ]
291
 
292
  def generate_video_from_text(
293
- prompt="",
294
- enhance_prompt_toggle=False,
295
- negative_prompt="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
296
- frame_rate=25,
297
- seed=171198,
298
- num_inference_steps=41,
299
- guidance_scale=4,
300
- height=320,
301
- width=512,
302
- num_frames=257,
303
  progress=gr.Progress(),
304
  ):
305
  if len(prompt.strip()) < 50:
@@ -308,10 +308,23 @@ def generate_video_from_text(
308
  duration=5,
309
  )
310
 
 
 
 
 
311
  # Translate Korean prompts to English
312
  prompt = translate_korean_prompt(prompt)
313
  negative_prompt = translate_korean_prompt(negative_prompt)
314
 
 
 
 
 
 
 
 
 
 
315
  sample = {
316
  "prompt": prompt,
317
  "prompt_attention_mask": None,
@@ -354,7 +367,6 @@ def generate_video_from_text(
354
  gc.collect()
355
 
356
  output_path = tempfile.mktemp(suffix=".mp4")
357
- print(images.shape)
358
  video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
359
  video_np = (video_np * 255).astype(np.uint8)
360
  height, width = video_np.shape[1:3]
@@ -371,21 +383,20 @@ def generate_video_from_text(
371
 
372
  def generate_video_from_image(
373
  image_path,
374
- prompt="",
375
- enhance_prompt_toggle=False,
376
- negative_prompt="low quality, worst quality, deformed, distorted, warped, motion smear, motion artifacts, fused fingers, incorrect anatomy, strange hands, unattractive",
377
- frame_rate=25,
378
- seed=171198,
379
- num_inference_steps=41,
380
- guidance_scale=4,
381
- height=320,
382
- width=512,
383
- num_frames=257,
384
  progress=gr.Progress(),
385
  ):
386
- print("Height: ", height)
387
- print("Width: ", width)
388
- print("Num Frames: ", num_frames)
389
 
390
  if len(prompt.strip()) < 50:
391
  raise gr.Error(
@@ -393,13 +404,24 @@ def generate_video_from_image(
393
  duration=5,
394
  )
395
 
396
- if not image_path:
397
- raise gr.Error("์ž…๋ ฅ ์ด๋ฏธ์ง€๋ฅผ ์ œ๊ณตํ•ด์ฃผ์„ธ์š”.", duration=5)
 
398
 
399
  # Translate Korean prompts to English
400
  prompt = translate_korean_prompt(prompt)
401
  negative_prompt = translate_korean_prompt(negative_prompt)
402
 
 
 
 
 
 
 
 
 
 
 
403
  media_items = (
404
  load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
405
  )
@@ -447,6 +469,7 @@ def generate_video_from_image(
447
  for frame in video_np[..., ::-1]:
448
  out.write(frame)
449
  out.release()
 
450
  except Exception as e:
451
  raise gr.Error(
452
  f"๋น„๋””์˜ค ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”. ์˜ค๋ฅ˜: {e}",
@@ -456,6 +479,12 @@ def generate_video_from_image(
456
  finally:
457
  torch.cuda.empty_cache()
458
  gc.collect()
 
 
 
 
 
 
459
 
460
  return output_path
461
 
@@ -813,7 +842,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as iface:
813
  lines=5,
814
  )
815
  txt2vid_enhance_toggle = Toggle(
816
- label="ํ”„๋กฌํ”„ํŠธ ๊ฐœ์„ ",
817
  value=False,
818
  interactive=True,
819
  )
@@ -1025,35 +1054,46 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as iface:
1025
  outputs=txt2vid_prompt
1026
  )
1027
 
 
1028
  txt2vid_generate.click(
1029
  fn=generate_video_from_text,
1030
  inputs=[
1031
- txt2vid_prompt,
1032
- txt2vid_enhance_toggle,
1033
- txt2vid_negative_prompt,
1034
- txt2vid_frame_rate,
1035
- *txt2vid_advanced[:3],
1036
- txt2vid_current_height,
1037
- txt2vid_current_width,
1038
- txt2vid_current_num_frames,
 
 
1039
  ],
1040
  outputs=txt2vid_output,
1041
- concurrency_limit=1,
1042
- concurrency_id="generate_video",
1043
- queue=True,
1044
  )
1045
 
1046
- # Image to Video Tab handlers
1047
- img2vid_preset.change(
1048
- fn=preset_changed,
1049
- inputs=[img2vid_preset],
1050
- outputs=[
1051
- img2vid_current_height,
1052
- img2vid_current_width,
1053
- img2vid_current_num_frames,
1054
- *img2vid_advanced[3:]
1055
- ]
 
 
 
 
 
 
 
 
1056
  )
 
 
1057
 
1058
  img2vid_enhance_toggle.change(
1059
  fn=update_prompt_i2v,
 
290
  ]
291
 
292
  def generate_video_from_text(
293
+ prompt,
294
+ enhance_prompt_toggle,
295
+ negative_prompt,
296
+ frame_rate,
297
+ seed,
298
+ num_inference_steps,
299
+ guidance_scale,
300
+ height,
301
+ width,
302
+ num_frames,
303
  progress=gr.Progress(),
304
  ):
305
  if len(prompt.strip()) < 50:
 
308
  duration=5,
309
  )
310
 
311
+ # ํ”„๋กฌํ”„ํŠธ ๊ฐœ์„ ์ด ํ™œ์„ฑํ™”๋œ ๊ฒฝ์šฐ
312
+ if enhance_prompt_toggle:
313
+ prompt = enhance_prompt(prompt, "t2v")
314
+
315
  # Translate Korean prompts to English
316
  prompt = translate_korean_prompt(prompt)
317
  negative_prompt = translate_korean_prompt(negative_prompt)
318
 
319
+ # ๊ธฐ๋ณธ๊ฐ’ ์„ค์ •
320
+ height = height or 320
321
+ width = width or 512
322
+ num_frames = num_frames or 257
323
+ frame_rate = frame_rate or 25
324
+ seed = seed or 171198
325
+ num_inference_steps = num_inference_steps or 41
326
+ guidance_scale = guidance_scale or 4.0
327
+
328
  sample = {
329
  "prompt": prompt,
330
  "prompt_attention_mask": None,
 
367
  gc.collect()
368
 
369
  output_path = tempfile.mktemp(suffix=".mp4")
 
370
  video_np = images.squeeze(0).permute(1, 2, 3, 0).cpu().float().numpy()
371
  video_np = (video_np * 255).astype(np.uint8)
372
  height, width = video_np.shape[1:3]
 
383
 
384
  def generate_video_from_image(
385
  image_path,
386
+ prompt,
387
+ enhance_prompt_toggle,
388
+ negative_prompt,
389
+ frame_rate,
390
+ seed,
391
+ num_inference_steps,
392
+ guidance_scale,
393
+ height,
394
+ width,
395
+ num_frames,
396
  progress=gr.Progress(),
397
  ):
398
+ if not image_path:
399
+ raise gr.Error("์ž…๋ ฅ ์ด๋ฏธ์ง€๋ฅผ ์ œ๊ณตํ•ด์ฃผ์„ธ์š”.", duration=5)
 
400
 
401
  if len(prompt.strip()) < 50:
402
  raise gr.Error(
 
404
  duration=5,
405
  )
406
 
407
+ # ํ”„๋กฌํ”„ํŠธ ๊ฐœ์„ ์ด ํ™œ์„ฑํ™”๋œ ๊ฒฝ์šฐ
408
+ if enhance_prompt_toggle:
409
+ prompt = enhance_prompt(prompt, "i2v")
410
 
411
  # Translate Korean prompts to English
412
  prompt = translate_korean_prompt(prompt)
413
  negative_prompt = translate_korean_prompt(negative_prompt)
414
 
415
+ # ๊ธฐ๋ณธ๊ฐ’ ์„ค์ •
416
+ height = height or 320
417
+ width = width or 512
418
+ num_frames = num_frames or 257
419
+ frame_rate = frame_rate or 25
420
+ seed = seed or 171198
421
+ num_inference_steps = num_inference_steps or 41
422
+ guidance_scale = guidance_scale or 4.0
423
+
424
+ # ์ด๋ฏธ์ง€ ๋กœ๋“œ ๋ฐ ์ „์ฒ˜๋ฆฌ
425
  media_items = (
426
  load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
427
  )
 
469
  for frame in video_np[..., ::-1]:
470
  out.write(frame)
471
  out.release()
472
+
473
  except Exception as e:
474
  raise gr.Error(
475
  f"๋น„๋””์˜ค ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”. ์˜ค๋ฅ˜: {e}",
 
479
  finally:
480
  torch.cuda.empty_cache()
481
  gc.collect()
482
+ if 'images' in locals():
483
+ del images
484
+ if 'video_np' in locals():
485
+ del video_np
486
+ if 'media_items' in locals():
487
+ del media_items
488
 
489
  return output_path
490
 
 
842
  lines=5,
843
  )
844
  txt2vid_enhance_toggle = Toggle(
845
+ label="ํ”„๋กฌํ”„ํŠธ ์ฆ๊ฐ•",
846
  value=False,
847
  interactive=True,
848
  )
 
1054
  outputs=txt2vid_prompt
1055
  )
1056
 
1057
+ # Event handlers ๋ถ€๋ถ„ ์ˆ˜์ •
1058
  txt2vid_generate.click(
1059
  fn=generate_video_from_text,
1060
  inputs=[
1061
+ txt2vid_prompt, # ํ…์ŠคํŠธ ์ž…๋ ฅ
1062
+ txt2vid_enhance_toggle, # ํ”„๋กฌํ”„ํŠธ ๊ฐœ์„  ํ† ๊ธ€
1063
+ txt2vid_negative_prompt, # ๋„ค๊ฑฐํ‹ฐ๋ธŒ ํ”„๋กฌํ”„ํŠธ
1064
+ txt2vid_frame_rate, # ํ”„๋ ˆ์ž„ ๋ ˆ์ดํŠธ
1065
+ txt2vid_advanced[0], # seed
1066
+ txt2vid_advanced[1], # inference_steps
1067
+ txt2vid_advanced[2], # guidance_scale
1068
+ height_slider, # height
1069
+ width_slider, # width
1070
+ num_frames_slider, # num_frames
1071
  ],
1072
  outputs=txt2vid_output,
1073
+ api_name="generate_text_to_video"
 
 
1074
  )
1075
 
1076
+ # Image to Video ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ ์ˆ˜์ •
1077
+ img2vid_generate.click(
1078
+ fn=generate_video_from_image,
1079
+ inputs=[
1080
+ img2vid_image, # ์ž…๋ ฅ ์ด๋ฏธ์ง€
1081
+ img2vid_prompt, # ํ…์ŠคํŠธ ์ž…๋ ฅ
1082
+ img2vid_enhance_toggle, # ํ”„๋กฌํ”„ํŠธ ๊ฐœ์„  ํ† ๊ธ€
1083
+ img2vid_negative_prompt, # ๋„ค๊ฑฐํ‹ฐ๋ธŒ ํ”„๋กฌํ”„ํŠธ
1084
+ img2vid_frame_rate, # ํ”„๋ ˆ์ž„ ๋ ˆ์ดํŠธ
1085
+ img2vid_advanced[0], # seed
1086
+ img2vid_advanced[1], # inference_steps
1087
+ img2vid_advanced[2], # guidance_scale
1088
+ height_slider, # height
1089
+ width_slider, # width
1090
+ num_frames_slider, # num_frames
1091
+ ],
1092
+ outputs=img2vid_output,
1093
+ api_name="generate_image_to_video"
1094
  )
1095
+
1096
+
1097
 
1098
  img2vid_enhance_toggle.change(
1099
  fn=update_prompt_i2v,