openfree commited on
Commit
549e657
Β·
verified Β·
1 Parent(s): b533050

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -73
app.py CHANGED
@@ -50,40 +50,6 @@ system_prompt_i2v = """당신은 이미지 기반 λΉ„λ””μ˜€ 생성을 μœ„ν•œ ν”„
50
  촬영 감독이 촬영 λͺ©λ‘μ„ μ„€λͺ…ν•˜λŠ” κ²ƒμ²˜λŸΌ ꡬ체적이고 μ‹œκ°μ μœΌλ‘œ μž‘μ„±ν•˜μ„Έμš”.
51
  200단어λ₯Ό λ„˜μ§€ μ•Šλ„λ‘ ν•˜λ˜, μ΅œλŒ€ν•œ μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜μ„Έμš”."""
52
 
53
- # Default preset
54
- default_preset = "16:9 (512x320)"
55
-
56
- # preset_options μˆ˜μ • - 각 해상도별 μ •ν™•ν•œ ν”„λ ˆμž„ 수 적용
57
- preset_options = [
58
- # 16:9 λΉ„μœ¨ (μ΅œλŒ€/μ΅œμ†Œ)
59
- {"label": "16:9 HD (1216x684)", "width": 1216, "height": 684, "num_frames": 41, "aspect": "16:9"}, # 1.6초
60
- {"label": "16:9 (512x320)", "width": 512, "height": 320, "num_frames": 257, "aspect": "16:9"}, # 10.3초
61
-
62
- # 4:3 λΉ„μœ¨ (μ΅œλŒ€/μ΅œμ†Œ)
63
- {"label": "4:3 (1024x768)", "width": 1024, "height": 768, "num_frames": 49, "aspect": "4:3"}, # 2.0초
64
- {"label": "4:3 (640x480)", "width": 640, "height": 480, "num_frames": 121, "aspect": "4:3"}, # 4.8초
65
-
66
- # 1:1 λΉ„μœ¨ (μ΅œλŒ€/μ΅œμ†Œ)
67
- {"label": "1:1 (896x896)", "width": 896, "height": 896, "num_frames": 73, "aspect": "1:1"}, # 2.9초
68
- {"label": "1:1 (512x512)", "width": 512, "height": 512, "num_frames": 233, "aspect": "1:1"}, # 9.3초
69
-
70
- # 3:2 λΉ„μœ¨ (μ΅œλŒ€)
71
- {"label": "3:2 (1200x800)", "width": 1200, "height": 800, "num_frames": 41, "aspect": "3:2"}, # 1.6초
72
-
73
- # 9:16 λΉ„μœ¨ (μ΅œμ†Œ)
74
- {"label": "9:16 (432x768)", "width": 432, "height": 768, "num_frames": 241, "aspect": "9:16"} # 9.6초
75
- ]
76
-
77
- # State λ³€μˆ˜λ“€μ˜ μ΄ˆκΈ°κ°’λ„ μˆ˜μ • (512x320 κΈ°μ€€)
78
- txt2vid_current_height = gr.State(value=320)
79
- txt2vid_current_width = gr.State(value=512)
80
- txt2vid_current_num_frames = gr.State(value=257) # 10.3초
81
-
82
- img2vid_current_height = gr.State(value=320)
83
- img2vid_current_width = gr.State(value=512)
84
- img2vid_current_num_frames = gr.State(value=257) # 10.3초
85
-
86
-
87
  # Load Hugging Face token if needed
88
  hf_token = os.getenv("HF_TOKEN")
89
  openai_api_key = os.getenv("OPENAI_API_KEY")
@@ -229,17 +195,68 @@ pipeline = XoraVideoPipeline(
229
  vae=vae,
230
  ).to(device)
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  def preset_changed(preset):
233
- selected = next(item for item in preset_options if item["label"] == preset)
234
- return (
235
- selected["height"],
236
- selected["width"],
237
- selected["num_frames"],
238
- gr.update(visible=False),
239
- gr.update(visible=False),
240
- gr.update(visible=False),
241
- )
242
-
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  def generate_video_from_text(
245
  prompt="",
@@ -249,12 +266,11 @@ def generate_video_from_text(
249
  seed=171198,
250
  num_inference_steps=41,
251
  guidance_scale=4,
252
- height=320, # κΈ°λ³Έκ°’ μˆ˜μ •
253
- width=512, # κΈ°λ³Έκ°’ μˆ˜μ •
254
- num_frames=257, # κΈ°λ³Έκ°’ μˆ˜μ • (10.3초)
255
  progress=gr.Progress(),
256
  ):
257
-
258
  if len(prompt.strip()) < 50:
259
  raise gr.Error(
260
  "ν”„λ‘¬ν”„νŠΈλŠ” μ΅œμ†Œ 50자 이상이어야 ν•©λ‹ˆλ‹€. 더 μžμ„Έν•œ μ„€λͺ…을 μ œκ³΅ν•΄μ£Όμ„Έμš”.",
@@ -295,7 +311,8 @@ def generate_video_from_text(
295
  vae_per_channel_normalize=True,
296
  conditioning_method=ConditioningMethod.UNCONDITIONAL,
297
  mixed_precision=True,
298
- callback_on_step_end=gradio_progress_callback,).images
 
299
  except Exception as e:
300
  raise gr.Error(
301
  f"λΉ„λ””μ˜€ 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”. 였λ₯˜: {e}",
@@ -330,12 +347,11 @@ def generate_video_from_image(
330
  seed=171198,
331
  num_inference_steps=50,
332
  guidance_scale=4,
333
- height=320, # κΈ°λ³Έκ°’ μˆ˜μ •
334
- width=512, # κΈ°λ³Έκ°’ μˆ˜μ •
335
- num_frames=257, # κΈ°λ³Έκ°’ μˆ˜μ • (10.3초)
336
  progress=gr.Progress(),
337
  ):
338
-
339
  print("Height: ", height)
340
  print("Width: ", width)
341
  print("Num Frames: ", num_frames)
@@ -439,26 +455,26 @@ def create_advanced_options():
439
  )
440
  height_slider = gr.Slider(
441
  label="4.4 Height",
442
- minimum=320,
443
- maximum=896,
444
  step=64,
445
- value=320,
446
  visible=False,
447
  )
448
  width_slider = gr.Slider(
449
  label="4.5 Width",
450
- minimum=512,
451
- maximum=1216,
452
  step=64,
453
- value=512,
454
  visible=False,
455
  )
456
  num_frames_slider = gr.Slider(
457
  label="4.5 Number of Frames",
458
- minimum=41, # μ΅œμ†Œ ν”„λ ˆμž„ 수 (HD 해상도 κΈ°μ€€)
459
- maximum=257, # μ΅œλŒ€ ν”„λ ˆμž„ 수 (512x320 해상도 κΈ°μ€€)
460
  step=1,
461
- value=257, # 기본값을 μ΅œλŒ€ ν”„λ ˆμž„μœΌλ‘œ μ„€μ • (10.3초)
462
  visible=False,
463
  )
464
 
@@ -471,7 +487,6 @@ def create_advanced_options():
471
  num_frames_slider,
472
  ]
473
 
474
-
475
  # Gradio Interface Definition
476
  with gr.Blocks(theme=gr.themes.Soft()) as iface:
477
  with gr.Tabs():
@@ -486,7 +501,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
486
  lines=5,
487
  )
488
  txt2vid_enhance_toggle = Toggle(
489
- label="ν”„λ‘¬ν”„νŠΈ 증강",
490
  value=False,
491
  interactive=True,
492
  )
@@ -500,13 +515,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
500
  )
501
 
502
  # ν˜„μž¬ μ„ νƒλœ 값듀을 μ €μž₯ν•  μƒνƒœ λ³€μˆ˜λ“€
503
- txt2vid_current_height = gr.State(value=360)
504
- txt2vid_current_width = gr.State(value=640)
505
- txt2vid_current_num_frames = gr.State(value=81)
506
 
507
  txt2vid_preset = gr.Dropdown(
508
  choices=[p["label"] for p in preset_options],
509
- value=default_preset,
510
  label="Step 2: 해상도 프리셋 선택",
511
  )
512
 
@@ -558,13 +573,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
558
  )
559
 
560
  # ν˜„μž¬ μ„ νƒλœ 값듀을 μ €μž₯ν•  μƒνƒœ λ³€μˆ˜λ“€
561
- img2vid_current_height = gr.State(value=360)
562
- img2vid_current_width = gr.State(value=640)
563
- img2vid_current_num_frames = gr.State(value=81)
564
 
565
  img2vid_preset = gr.Dropdown(
566
  choices=[p["label"] for p in preset_options],
567
- value=default_preset,
568
  label="Step 3: 해상도 프리셋 선택",
569
  )
570
 
@@ -662,4 +677,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
662
  if __name__ == "__main__":
663
  iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(
664
  share=True, show_api=False
665
- )
 
50
  촬영 감독이 촬영 λͺ©λ‘μ„ μ„€λͺ…ν•˜λŠ” κ²ƒμ²˜λŸΌ ꡬ체적이고 μ‹œκ°μ μœΌλ‘œ μž‘μ„±ν•˜μ„Έμš”.
51
  200단어λ₯Ό λ„˜μ§€ μ•Šλ„λ‘ ν•˜λ˜, μ΅œλŒ€ν•œ μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜μ„Έμš”."""
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Load Hugging Face token if needed
54
  hf_token = os.getenv("HF_TOKEN")
55
  openai_api_key = os.getenv("OPENAI_API_KEY")
 
195
  vae=vae,
196
  ).to(device)
197
 
198
+
199
+ # txt2vid와 img2vid의 ν˜„μž¬ μƒνƒœ 값듀을 μˆ˜μ •
200
+ txt2vid_current_height = gr.State(value=320) # μˆ˜μ •λ¨
201
+ txt2vid_current_width = gr.State(value=512) # μˆ˜μ •λ¨
202
+ txt2vid_current_num_frames = gr.State(value=257) # 10.3초
203
+
204
+ img2vid_current_height = gr.State(value=320) # μˆ˜μ •λ¨
205
+ img2vid_current_width = gr.State(value=512) # μˆ˜μ •λ¨
206
+ img2vid_current_num_frames = gr.State(value=257) # 10.3초
207
+
208
+ # Preset options for resolution and frame configuration
209
+ # Convert frames to seconds assuming 25 FPS
210
+ preset_options = [
211
+ {"label": "[16:9 HD] 1216x704, 1.6초", "width": 1216, "height": 704, "num_frames": 41},
212
+ {"label": "[16:9] 1088x704, 2.0초", "width": 1088, "height": 704, "num_frames": 49},
213
+ {"label": "[16:9] 1056x640, 2.3초", "width": 1056, "height": 640, "num_frames": 57},
214
+ {"label": "[16:9] 992x608, 2.6초", "width": 992, "height": 608, "num_frames": 65},
215
+ {"label": "[16:9] 896x608, 2.9초", "width": 896, "height": 608, "num_frames": 73},
216
+ {"label": "[16:9] 896x544, 3.2초", "width": 896, "height": 544, "num_frames": 81},
217
+ {"label": "[16:9] 832x544, 3.6초", "width": 832, "height": 544, "num_frames": 89},
218
+ {"label": "[16:9] 800x512, 3.9초", "width": 800, "height": 512, "num_frames": 97},
219
+ {"label": "[16:9] 768x512, 3.9초", "width": 768, "height": 512, "num_frames": 97},
220
+ {"label": "[16:9] 800x480, 4.2초", "width": 800, "height": 480, "num_frames": 105},
221
+ {"label": "[16:9] 736x480, 4.5초", "width": 736, "height": 480, "num_frames": 113},
222
+ {"label": "[3:2] 704x480, 4.8초", "width": 704, "height": 480, "num_frames": 121},
223
+ {"label": "[16:9] 704x448, 5.2초", "width": 704, "height": 448, "num_frames": 129},
224
+ {"label": "[16:9] 672x448, 5.5초", "width": 672, "height": 448, "num_frames": 137},
225
+ {"label": "[16:9] 640x416, 6.1초", "width": 640, "height": 416, "num_frames": 153},
226
+ {"label": "[16:9] 672x384, 6.4초", "width": 672, "height": 384, "num_frames": 161},
227
+ {"label": "[16:9] 640x384, 6.8초", "width": 640, "height": 384, "num_frames": 169},
228
+ {"label": "[16:9] 608x384, 7.1초", "width": 608, "height": 384, "num_frames": 177},
229
+ {"label": "[16:9] 576x384, 7.4초", "width": 576, "height": 384, "num_frames": 185},
230
+ {"label": "[16:9] 608x352, 7.7초", "width": 608, "height": 352, "num_frames": 193},
231
+ {"label": "[16:9] 576x352, 8.0초", "width": 576, "height": 352, "num_frames": 201},
232
+ {"label": "[16:9] 544x352, 8.4초", "width": 544, "height": 352, "num_frames": 209},
233
+ {"label": "[3:2] 512x352, 9.3초", "width": 512, "height": 352, "num_frames": 233},
234
+ {"label": "[16:9] 544x320, 9.6초", "width": 544, "height": 320, "num_frames": 241},
235
+ {"label": "[16:9] 512x320, 10.3초", "width": 512, "height": 320, "num_frames": 257},
236
+ ]
237
+
238
  def preset_changed(preset):
239
+ if preset != "Custom":
240
+ selected = next(item for item in preset_options if item["label"] == preset)
241
+ # height, width, num_frames 값을 global λ³€μˆ˜λ‘œ μ—…λ°μ΄νŠΈ
242
+ return (
243
+ selected["height"],
244
+ selected["width"],
245
+ selected["num_frames"],
246
+ gr.update(visible=False),
247
+ gr.update(visible=False),
248
+ gr.update(visible=False),
249
+ )
250
+ else:
251
+ return (
252
+ None,
253
+ None,
254
+ None,
255
+ gr.update(visible=True),
256
+ gr.update(visible=True),
257
+ gr.update(visible=True),
258
+ )
259
+
260
 
261
  def generate_video_from_text(
262
  prompt="",
 
266
  seed=171198,
267
  num_inference_steps=41,
268
  guidance_scale=4,
269
+ height=512,
270
+ width=320,
271
+ num_frames=257,
272
  progress=gr.Progress(),
273
  ):
 
274
  if len(prompt.strip()) < 50:
275
  raise gr.Error(
276
  "ν”„λ‘¬ν”„νŠΈλŠ” μ΅œμ†Œ 50자 이상이어야 ν•©λ‹ˆλ‹€. 더 μžμ„Έν•œ μ„€λͺ…을 μ œκ³΅ν•΄μ£Όμ„Έμš”.",
 
311
  vae_per_channel_normalize=True,
312
  conditioning_method=ConditioningMethod.UNCONDITIONAL,
313
  mixed_precision=True,
314
+ callback_on_step_end=gradio_progress_callback,
315
+ ).images
316
  except Exception as e:
317
  raise gr.Error(
318
  f"λΉ„λ””μ˜€ 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”. 였λ₯˜: {e}",
 
347
  seed=171198,
348
  num_inference_steps=50,
349
  guidance_scale=4,
350
+ height=512,
351
+ width=768,
352
+ num_frames=121,
353
  progress=gr.Progress(),
354
  ):
 
355
  print("Height: ", height)
356
  print("Width: ", width)
357
  print("Num Frames: ", num_frames)
 
455
  )
456
  height_slider = gr.Slider(
457
  label="4.4 Height",
458
+ minimum=256,
459
+ maximum=1024,
460
  step=64,
461
+ value=512,
462
  visible=False,
463
  )
464
  width_slider = gr.Slider(
465
  label="4.5 Width",
466
+ minimum=256,
467
+ maximum=1024,
468
  step=64,
469
+ value=768,
470
  visible=False,
471
  )
472
  num_frames_slider = gr.Slider(
473
  label="4.5 Number of Frames",
474
+ minimum=1,
475
+ maximum=200,
476
  step=1,
477
+ value=121,
478
  visible=False,
479
  )
480
 
 
487
  num_frames_slider,
488
  ]
489
 
 
490
  # Gradio Interface Definition
491
  with gr.Blocks(theme=gr.themes.Soft()) as iface:
492
  with gr.Tabs():
 
501
  lines=5,
502
  )
503
  txt2vid_enhance_toggle = Toggle(
504
+ label="ν”„λ‘¬ν”„νŠΈ κ°œμ„ ",
505
  value=False,
506
  interactive=True,
507
  )
 
515
  )
516
 
517
  # ν˜„μž¬ μ„ νƒλœ 값듀을 μ €μž₯ν•  μƒνƒœ λ³€μˆ˜λ“€
518
+ txt2vid_current_height = gr.State(value=512)
519
+ txt2vid_current_width = gr.State(value=320)
520
+ txt2vid_current_num_frames = gr.State(value=257)
521
 
522
  txt2vid_preset = gr.Dropdown(
523
  choices=[p["label"] for p in preset_options],
524
+ value="512x320, 10.3초",
525
  label="Step 2: 해상도 프리셋 선택",
526
  )
527
 
 
573
  )
574
 
575
  # ν˜„μž¬ μ„ νƒλœ 값듀을 μ €μž₯ν•  μƒνƒœ λ³€μˆ˜λ“€
576
+ img2vid_current_height = gr.State(value=512)
577
+ img2vid_current_width = gr.State(value=768)
578
+ img2vid_current_num_frames = gr.State(value=97)
579
 
580
  img2vid_preset = gr.Dropdown(
581
  choices=[p["label"] for p in preset_options],
582
+ value="512x320, 10.3초",
583
  label="Step 3: 해상도 프리셋 선택",
584
  )
585
 
 
677
  if __name__ == "__main__":
678
  iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(
679
  share=True, show_api=False
680
+ )