Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -50,40 +50,6 @@ system_prompt_i2v = """λΉμ μ μ΄λ―Έμ§ κΈ°λ° λΉλμ€ μμ±μ μν ν
|
|
50 |
촬μ κ°λ
μ΄ μ΄¬μ λͺ©λ‘μ μ€λͺ
νλ κ²μ²λΌ ꡬ체μ μ΄κ³ μκ°μ μΌλ‘ μμ±νμΈμ.
|
51 |
200λ¨μ΄λ₯Ό λμ§ μλλ‘ νλ, μ΅λν μμΈνκ² μμ±νμΈμ."""
|
52 |
|
53 |
-
# Default preset
|
54 |
-
default_preset = "16:9 (512x320)"
|
55 |
-
|
56 |
-
# preset_options μμ - κ° ν΄μλλ³ μ νν νλ μ μ μ μ©
|
57 |
-
preset_options = [
|
58 |
-
# 16:9 λΉμ¨ (μ΅λ/μ΅μ)
|
59 |
-
{"label": "16:9 HD (1216x684)", "width": 1216, "height": 684, "num_frames": 41, "aspect": "16:9"}, # 1.6μ΄
|
60 |
-
{"label": "16:9 (512x320)", "width": 512, "height": 320, "num_frames": 257, "aspect": "16:9"}, # 10.3μ΄
|
61 |
-
|
62 |
-
# 4:3 λΉμ¨ (μ΅λ/μ΅μ)
|
63 |
-
{"label": "4:3 (1024x768)", "width": 1024, "height": 768, "num_frames": 49, "aspect": "4:3"}, # 2.0μ΄
|
64 |
-
{"label": "4:3 (640x480)", "width": 640, "height": 480, "num_frames": 121, "aspect": "4:3"}, # 4.8μ΄
|
65 |
-
|
66 |
-
# 1:1 λΉμ¨ (μ΅λ/μ΅μ)
|
67 |
-
{"label": "1:1 (896x896)", "width": 896, "height": 896, "num_frames": 73, "aspect": "1:1"}, # 2.9μ΄
|
68 |
-
{"label": "1:1 (512x512)", "width": 512, "height": 512, "num_frames": 233, "aspect": "1:1"}, # 9.3μ΄
|
69 |
-
|
70 |
-
# 3:2 λΉμ¨ (μ΅λ)
|
71 |
-
{"label": "3:2 (1200x800)", "width": 1200, "height": 800, "num_frames": 41, "aspect": "3:2"}, # 1.6μ΄
|
72 |
-
|
73 |
-
# 9:16 λΉμ¨ (μ΅μ)
|
74 |
-
{"label": "9:16 (432x768)", "width": 432, "height": 768, "num_frames": 241, "aspect": "9:16"} # 9.6μ΄
|
75 |
-
]
|
76 |
-
|
77 |
-
# State λ³μλ€μ μ΄κΈ°κ°λ μμ (512x320 κΈ°μ€)
|
78 |
-
txt2vid_current_height = gr.State(value=320)
|
79 |
-
txt2vid_current_width = gr.State(value=512)
|
80 |
-
txt2vid_current_num_frames = gr.State(value=257) # 10.3μ΄
|
81 |
-
|
82 |
-
img2vid_current_height = gr.State(value=320)
|
83 |
-
img2vid_current_width = gr.State(value=512)
|
84 |
-
img2vid_current_num_frames = gr.State(value=257) # 10.3μ΄
|
85 |
-
|
86 |
-
|
87 |
# Load Hugging Face token if needed
|
88 |
hf_token = os.getenv("HF_TOKEN")
|
89 |
openai_api_key = os.getenv("OPENAI_API_KEY")
|
@@ -229,17 +195,68 @@ pipeline = XoraVideoPipeline(
|
|
229 |
vae=vae,
|
230 |
).to(device)
|
231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
def preset_changed(preset):
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
244 |
def generate_video_from_text(
|
245 |
prompt="",
|
@@ -249,12 +266,11 @@ def generate_video_from_text(
|
|
249 |
seed=171198,
|
250 |
num_inference_steps=41,
|
251 |
guidance_scale=4,
|
252 |
-
height=
|
253 |
-
width=
|
254 |
-
num_frames=257,
|
255 |
progress=gr.Progress(),
|
256 |
):
|
257 |
-
|
258 |
if len(prompt.strip()) < 50:
|
259 |
raise gr.Error(
|
260 |
"ν둬ννΈλ μ΅μ 50μ μ΄μμ΄μ΄μΌ ν©λλ€. λ μμΈν μ€λͺ
μ μ 곡ν΄μ£ΌμΈμ.",
|
@@ -295,7 +311,8 @@ def generate_video_from_text(
|
|
295 |
vae_per_channel_normalize=True,
|
296 |
conditioning_method=ConditioningMethod.UNCONDITIONAL,
|
297 |
mixed_precision=True,
|
298 |
-
callback_on_step_end=gradio_progress_callback,
|
|
|
299 |
except Exception as e:
|
300 |
raise gr.Error(
|
301 |
f"λΉλμ€ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€. λ€μ μλν΄μ£ΌμΈμ. μ€λ₯: {e}",
|
@@ -330,12 +347,11 @@ def generate_video_from_image(
|
|
330 |
seed=171198,
|
331 |
num_inference_steps=50,
|
332 |
guidance_scale=4,
|
333 |
-
height=
|
334 |
-
width=
|
335 |
-
num_frames=
|
336 |
progress=gr.Progress(),
|
337 |
):
|
338 |
-
|
339 |
print("Height: ", height)
|
340 |
print("Width: ", width)
|
341 |
print("Num Frames: ", num_frames)
|
@@ -439,26 +455,26 @@ def create_advanced_options():
|
|
439 |
)
|
440 |
height_slider = gr.Slider(
|
441 |
label="4.4 Height",
|
442 |
-
minimum=
|
443 |
-
maximum=
|
444 |
step=64,
|
445 |
-
value=
|
446 |
visible=False,
|
447 |
)
|
448 |
width_slider = gr.Slider(
|
449 |
label="4.5 Width",
|
450 |
-
minimum=
|
451 |
-
maximum=
|
452 |
step=64,
|
453 |
-
value=
|
454 |
visible=False,
|
455 |
)
|
456 |
num_frames_slider = gr.Slider(
|
457 |
label="4.5 Number of Frames",
|
458 |
-
minimum=
|
459 |
-
maximum=
|
460 |
step=1,
|
461 |
-
value=
|
462 |
visible=False,
|
463 |
)
|
464 |
|
@@ -471,7 +487,6 @@ def create_advanced_options():
|
|
471 |
num_frames_slider,
|
472 |
]
|
473 |
|
474 |
-
|
475 |
# Gradio Interface Definition
|
476 |
with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
477 |
with gr.Tabs():
|
@@ -486,7 +501,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
486 |
lines=5,
|
487 |
)
|
488 |
txt2vid_enhance_toggle = Toggle(
|
489 |
-
label="ν둬ννΈ
|
490 |
value=False,
|
491 |
interactive=True,
|
492 |
)
|
@@ -500,13 +515,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
500 |
)
|
501 |
|
502 |
# νμ¬ μ νλ κ°λ€μ μ μ₯ν μν λ³μλ€
|
503 |
-
txt2vid_current_height = gr.State(value=
|
504 |
-
txt2vid_current_width = gr.State(value=
|
505 |
-
txt2vid_current_num_frames = gr.State(value=
|
506 |
|
507 |
txt2vid_preset = gr.Dropdown(
|
508 |
choices=[p["label"] for p in preset_options],
|
509 |
-
value=
|
510 |
label="Step 2: ν΄μλ ν리μ
μ ν",
|
511 |
)
|
512 |
|
@@ -558,13 +573,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
558 |
)
|
559 |
|
560 |
# νμ¬ μ νλ κ°λ€μ μ μ₯ν μν λ³μλ€
|
561 |
-
img2vid_current_height = gr.State(value=
|
562 |
-
img2vid_current_width = gr.State(value=
|
563 |
-
img2vid_current_num_frames = gr.State(value=
|
564 |
|
565 |
img2vid_preset = gr.Dropdown(
|
566 |
choices=[p["label"] for p in preset_options],
|
567 |
-
value=
|
568 |
label="Step 3: ν΄μλ ν리μ
μ ν",
|
569 |
)
|
570 |
|
@@ -662,4 +677,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
662 |
if __name__ == "__main__":
|
663 |
iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(
|
664 |
share=True, show_api=False
|
665 |
-
)
|
|
|
50 |
촬μ κ°λ
μ΄ μ΄¬μ λͺ©λ‘μ μ€λͺ
νλ κ²μ²λΌ ꡬ체μ μ΄κ³ μκ°μ μΌλ‘ μμ±νμΈμ.
|
51 |
200λ¨μ΄λ₯Ό λμ§ μλλ‘ νλ, μ΅λν μμΈνκ² μμ±νμΈμ."""
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
# Load Hugging Face token if needed
|
54 |
hf_token = os.getenv("HF_TOKEN")
|
55 |
openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
|
195 |
vae=vae,
|
196 |
).to(device)
|
197 |
|
198 |
+
|
199 |
+
# txt2vidμ img2vidμ νμ¬ μν κ°λ€μ μμ
|
200 |
+
txt2vid_current_height = gr.State(value=320) # μμ λ¨
|
201 |
+
txt2vid_current_width = gr.State(value=512) # μμ λ¨
|
202 |
+
txt2vid_current_num_frames = gr.State(value=257) # 10.3μ΄
|
203 |
+
|
204 |
+
img2vid_current_height = gr.State(value=320) # μμ λ¨
|
205 |
+
img2vid_current_width = gr.State(value=512) # μμ λ¨
|
206 |
+
img2vid_current_num_frames = gr.State(value=257) # 10.3μ΄
|
207 |
+
|
208 |
+
# Preset options for resolution and frame configuration
|
209 |
+
# Convert frames to seconds assuming 25 FPS
|
210 |
+
preset_options = [
|
211 |
+
{"label": "[16:9 HD] 1216x704, 1.6μ΄", "width": 1216, "height": 704, "num_frames": 41},
|
212 |
+
{"label": "[16:9] 1088x704, 2.0μ΄", "width": 1088, "height": 704, "num_frames": 49},
|
213 |
+
{"label": "[16:9] 1056x640, 2.3μ΄", "width": 1056, "height": 640, "num_frames": 57},
|
214 |
+
{"label": "[16:9] 992x608, 2.6μ΄", "width": 992, "height": 608, "num_frames": 65},
|
215 |
+
{"label": "[16:9] 896x608, 2.9μ΄", "width": 896, "height": 608, "num_frames": 73},
|
216 |
+
{"label": "[16:9] 896x544, 3.2μ΄", "width": 896, "height": 544, "num_frames": 81},
|
217 |
+
{"label": "[16:9] 832x544, 3.6μ΄", "width": 832, "height": 544, "num_frames": 89},
|
218 |
+
{"label": "[16:9] 800x512, 3.9μ΄", "width": 800, "height": 512, "num_frames": 97},
|
219 |
+
{"label": "[16:9] 768x512, 3.9μ΄", "width": 768, "height": 512, "num_frames": 97},
|
220 |
+
{"label": "[16:9] 800x480, 4.2μ΄", "width": 800, "height": 480, "num_frames": 105},
|
221 |
+
{"label": "[16:9] 736x480, 4.5μ΄", "width": 736, "height": 480, "num_frames": 113},
|
222 |
+
{"label": "[3:2] 704x480, 4.8μ΄", "width": 704, "height": 480, "num_frames": 121},
|
223 |
+
{"label": "[16:9] 704x448, 5.2μ΄", "width": 704, "height": 448, "num_frames": 129},
|
224 |
+
{"label": "[16:9] 672x448, 5.5μ΄", "width": 672, "height": 448, "num_frames": 137},
|
225 |
+
{"label": "[16:9] 640x416, 6.1μ΄", "width": 640, "height": 416, "num_frames": 153},
|
226 |
+
{"label": "[16:9] 672x384, 6.4μ΄", "width": 672, "height": 384, "num_frames": 161},
|
227 |
+
{"label": "[16:9] 640x384, 6.8μ΄", "width": 640, "height": 384, "num_frames": 169},
|
228 |
+
{"label": "[16:9] 608x384, 7.1μ΄", "width": 608, "height": 384, "num_frames": 177},
|
229 |
+
{"label": "[16:9] 576x384, 7.4μ΄", "width": 576, "height": 384, "num_frames": 185},
|
230 |
+
{"label": "[16:9] 608x352, 7.7μ΄", "width": 608, "height": 352, "num_frames": 193},
|
231 |
+
{"label": "[16:9] 576x352, 8.0μ΄", "width": 576, "height": 352, "num_frames": 201},
|
232 |
+
{"label": "[16:9] 544x352, 8.4μ΄", "width": 544, "height": 352, "num_frames": 209},
|
233 |
+
{"label": "[3:2] 512x352, 9.3μ΄", "width": 512, "height": 352, "num_frames": 233},
|
234 |
+
{"label": "[16:9] 544x320, 9.6μ΄", "width": 544, "height": 320, "num_frames": 241},
|
235 |
+
{"label": "[16:9] 512x320, 10.3μ΄", "width": 512, "height": 320, "num_frames": 257},
|
236 |
+
]
|
237 |
+
|
238 |
def preset_changed(preset):
|
239 |
+
if preset != "Custom":
|
240 |
+
selected = next(item for item in preset_options if item["label"] == preset)
|
241 |
+
# height, width, num_frames κ°μ global λ³μλ‘ μ
λ°μ΄νΈ
|
242 |
+
return (
|
243 |
+
selected["height"],
|
244 |
+
selected["width"],
|
245 |
+
selected["num_frames"],
|
246 |
+
gr.update(visible=False),
|
247 |
+
gr.update(visible=False),
|
248 |
+
gr.update(visible=False),
|
249 |
+
)
|
250 |
+
else:
|
251 |
+
return (
|
252 |
+
None,
|
253 |
+
None,
|
254 |
+
None,
|
255 |
+
gr.update(visible=True),
|
256 |
+
gr.update(visible=True),
|
257 |
+
gr.update(visible=True),
|
258 |
+
)
|
259 |
+
|
260 |
|
261 |
def generate_video_from_text(
|
262 |
prompt="",
|
|
|
266 |
seed=171198,
|
267 |
num_inference_steps=41,
|
268 |
guidance_scale=4,
|
269 |
+
height=512,
|
270 |
+
width=320,
|
271 |
+
num_frames=257,
|
272 |
progress=gr.Progress(),
|
273 |
):
|
|
|
274 |
if len(prompt.strip()) < 50:
|
275 |
raise gr.Error(
|
276 |
"ν둬ννΈλ μ΅μ 50μ μ΄μμ΄μ΄μΌ ν©λλ€. λ μμΈν μ€λͺ
μ μ 곡ν΄μ£ΌμΈμ.",
|
|
|
311 |
vae_per_channel_normalize=True,
|
312 |
conditioning_method=ConditioningMethod.UNCONDITIONAL,
|
313 |
mixed_precision=True,
|
314 |
+
callback_on_step_end=gradio_progress_callback,
|
315 |
+
).images
|
316 |
except Exception as e:
|
317 |
raise gr.Error(
|
318 |
f"λΉλμ€ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€. λ€μ μλν΄μ£ΌμΈμ. μ€λ₯: {e}",
|
|
|
347 |
seed=171198,
|
348 |
num_inference_steps=50,
|
349 |
guidance_scale=4,
|
350 |
+
height=512,
|
351 |
+
width=768,
|
352 |
+
num_frames=121,
|
353 |
progress=gr.Progress(),
|
354 |
):
|
|
|
355 |
print("Height: ", height)
|
356 |
print("Width: ", width)
|
357 |
print("Num Frames: ", num_frames)
|
|
|
455 |
)
|
456 |
height_slider = gr.Slider(
|
457 |
label="4.4 Height",
|
458 |
+
minimum=256,
|
459 |
+
maximum=1024,
|
460 |
step=64,
|
461 |
+
value=512,
|
462 |
visible=False,
|
463 |
)
|
464 |
width_slider = gr.Slider(
|
465 |
label="4.5 Width",
|
466 |
+
minimum=256,
|
467 |
+
maximum=1024,
|
468 |
step=64,
|
469 |
+
value=768,
|
470 |
visible=False,
|
471 |
)
|
472 |
num_frames_slider = gr.Slider(
|
473 |
label="4.5 Number of Frames",
|
474 |
+
minimum=1,
|
475 |
+
maximum=200,
|
476 |
step=1,
|
477 |
+
value=121,
|
478 |
visible=False,
|
479 |
)
|
480 |
|
|
|
487 |
num_frames_slider,
|
488 |
]
|
489 |
|
|
|
490 |
# Gradio Interface Definition
|
491 |
with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
492 |
with gr.Tabs():
|
|
|
501 |
lines=5,
|
502 |
)
|
503 |
txt2vid_enhance_toggle = Toggle(
|
504 |
+
label="ν둬ννΈ κ°μ ",
|
505 |
value=False,
|
506 |
interactive=True,
|
507 |
)
|
|
|
515 |
)
|
516 |
|
517 |
# νμ¬ μ νλ κ°λ€μ μ μ₯ν μν λ³μλ€
|
518 |
+
txt2vid_current_height = gr.State(value=512)
|
519 |
+
txt2vid_current_width = gr.State(value=320)
|
520 |
+
txt2vid_current_num_frames = gr.State(value=257)
|
521 |
|
522 |
txt2vid_preset = gr.Dropdown(
|
523 |
choices=[p["label"] for p in preset_options],
|
524 |
+
value="512x320, 10.3μ΄",
|
525 |
label="Step 2: ν΄μλ ν리μ
μ ν",
|
526 |
)
|
527 |
|
|
|
573 |
)
|
574 |
|
575 |
# νμ¬ μ νλ κ°λ€μ μ μ₯ν μν λ³μλ€
|
576 |
+
img2vid_current_height = gr.State(value=512)
|
577 |
+
img2vid_current_width = gr.State(value=768)
|
578 |
+
img2vid_current_num_frames = gr.State(value=97)
|
579 |
|
580 |
img2vid_preset = gr.Dropdown(
|
581 |
choices=[p["label"] for p in preset_options],
|
582 |
+
value="512x320, 10.3μ΄",
|
583 |
label="Step 3: ν΄μλ ν리μ
μ ν",
|
584 |
)
|
585 |
|
|
|
677 |
if __name__ == "__main__":
|
678 |
iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(
|
679 |
share=True, show_api=False
|
680 |
+
)
|