Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import random
|
| 5 |
import os
|
|
@@ -113,7 +114,10 @@ if PIPELINE_CONFIG_YAML.get("spatial_upscaler_model_path"):
|
|
| 113 |
)
|
| 114 |
print("Latent upsampler created on CPU.")
|
| 115 |
|
|
|
|
|
|
|
| 116 |
|
|
|
|
| 117 |
def generate(prompt, negative_prompt, input_image_filepath, input_video_filepath,
|
| 118 |
height_ui, width_ui, mode,
|
| 119 |
ui_steps, num_frames_ui,
|
|
@@ -196,12 +200,11 @@ def generate(prompt, negative_prompt, input_image_filepath, input_video_filepath
|
|
| 196 |
raise gr.Error(f"Could not load video: {e}")
|
| 197 |
|
| 198 |
print(f"Moving models to {target_inference_device} for inference...")
|
| 199 |
-
|
| 200 |
active_latent_upsampler = None
|
| 201 |
if improve_texture_flag and latent_upsampler_instance:
|
| 202 |
-
latent_upsampler_instance.to(target_inference_device)
|
| 203 |
active_latent_upsampler = latent_upsampler_instance
|
| 204 |
-
print("Models moved.")
|
| 205 |
|
| 206 |
result_images_tensor = None
|
| 207 |
try:
|
|
@@ -238,16 +241,6 @@ def generate(prompt, negative_prompt, input_image_filepath, input_video_filepath
|
|
| 238 |
|
| 239 |
print(f"Calling base pipeline (padded HxW: {height_padded}x{width_padded}) on {target_inference_device}")
|
| 240 |
result_images_tensor = pipeline_instance(**single_pass_call_kwargs).images
|
| 241 |
-
|
| 242 |
-
finally:
|
| 243 |
-
print(f"Moving models back to CPU...")
|
| 244 |
-
pipeline_instance.to("cpu")
|
| 245 |
-
if active_latent_upsampler:
|
| 246 |
-
active_latent_upsampler.to("cpu")
|
| 247 |
-
|
| 248 |
-
if target_inference_device == "cuda":
|
| 249 |
-
torch.cuda.empty_cache()
|
| 250 |
-
print("Models moved back to CPU and cache cleared (if CUDA).")
|
| 251 |
|
| 252 |
if result_images_tensor is None:
|
| 253 |
raise gr.Error("Generation failed.")
|
|
@@ -316,22 +309,22 @@ css="""
|
|
| 316 |
}
|
| 317 |
"""
|
| 318 |
|
| 319 |
-
with gr.Blocks(css=css
|
| 320 |
gr.Markdown("# LTX Video 0.9.7 Distilled (using LTX-Video lib)")
|
| 321 |
gr.Markdown("Generates a short video based on text prompt, image, or existing video. Models are moved to GPU during generation and back to CPU afterwards to save VRAM.")
|
| 322 |
with gr.Row():
|
| 323 |
with gr.Column():
|
| 324 |
with gr.Group():
|
| 325 |
-
with gr.Tab("text-to-video") as text_tab:
|
| 326 |
-
image_n_hidden = gr.Textbox(label="image_n", visible=False, value=None)
|
| 327 |
-
video_n_hidden = gr.Textbox(label="video_n", visible=False, value=None)
|
| 328 |
-
t2v_prompt = gr.Textbox(label="Prompt", value="A majestic dragon flying over a medieval castle", lines=3)
|
| 329 |
-
t2v_button = gr.Button("Generate Text-to-Video", variant="primary")
|
| 330 |
with gr.Tab("image-to-video") as image_tab:
|
| 331 |
video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
|
| 332 |
image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam"])
|
| 333 |
i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
| 334 |
i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
with gr.Tab("video-to-video") as video_tab:
|
| 336 |
image_v_hidden = gr.Textbox(label="image_v", visible=False, value=None)
|
| 337 |
video_v2v = gr.Video(label="Input Video", sources=["upload", "webcam"])
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
+
import spaces
|
| 4 |
import numpy as np
|
| 5 |
import random
|
| 6 |
import os
|
|
|
|
| 114 |
)
|
| 115 |
print("Latent upsampler created on CPU.")
|
| 116 |
|
| 117 |
+
pipeline_instance.to(target_inference_device)
|
| 118 |
+
latent_upsampler_instance.to(target_inference_device)
|
| 119 |
|
| 120 |
+
@spaces.GPU
|
| 121 |
def generate(prompt, negative_prompt, input_image_filepath, input_video_filepath,
|
| 122 |
height_ui, width_ui, mode,
|
| 123 |
ui_steps, num_frames_ui,
|
|
|
|
| 200 |
raise gr.Error(f"Could not load video: {e}")
|
| 201 |
|
| 202 |
print(f"Moving models to {target_inference_device} for inference...")
|
| 203 |
+
|
| 204 |
active_latent_upsampler = None
|
| 205 |
if improve_texture_flag and latent_upsampler_instance:
|
|
|
|
| 206 |
active_latent_upsampler = latent_upsampler_instance
|
| 207 |
+
#print("Models moved.")
|
| 208 |
|
| 209 |
result_images_tensor = None
|
| 210 |
try:
|
|
|
|
| 241 |
|
| 242 |
print(f"Calling base pipeline (padded HxW: {height_padded}x{width_padded}) on {target_inference_device}")
|
| 243 |
result_images_tensor = pipeline_instance(**single_pass_call_kwargs).images
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
if result_images_tensor is None:
|
| 246 |
raise gr.Error("Generation failed.")
|
|
|
|
| 309 |
}
|
| 310 |
"""
|
| 311 |
|
| 312 |
+
with gr.Blocks(css=css) as demo:
|
| 313 |
gr.Markdown("# LTX Video 0.9.7 Distilled (using LTX-Video lib)")
|
| 314 |
gr.Markdown("Generates a short video based on text prompt, image, or existing video. Models are moved to GPU during generation and back to CPU afterwards to save VRAM.")
|
| 315 |
with gr.Row():
|
| 316 |
with gr.Column():
|
| 317 |
with gr.Group():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
with gr.Tab("image-to-video") as image_tab:
|
| 319 |
video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
|
| 320 |
image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam"])
|
| 321 |
i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
| 322 |
i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
|
| 323 |
+
with gr.Tab("text-to-video") as text_tab:
|
| 324 |
+
image_n_hidden = gr.Textbox(label="image_n", visible=False, value=None)
|
| 325 |
+
video_n_hidden = gr.Textbox(label="video_n", visible=False, value=None)
|
| 326 |
+
t2v_prompt = gr.Textbox(label="Prompt", value="A majestic dragon flying over a medieval castle", lines=3)
|
| 327 |
+
t2v_button = gr.Button("Generate Text-to-Video", variant="primary")
|
| 328 |
with gr.Tab("video-to-video") as video_tab:
|
| 329 |
image_v_hidden = gr.Textbox(label="image_v", visible=False, value=None)
|
| 330 |
video_v2v = gr.Video(label="Input Video", sources=["upload", "webcam"])
|