linoyts HF Staff commited on
Commit
ddd3c88
·
verified ·
1 Parent(s): 9f55bc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -5
app.py CHANGED
@@ -5,7 +5,7 @@ from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
5
  from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
6
  from diffusers.utils import export_to_video, load_video
7
 
8
- pipe = LTXConditionPipeline.from_pretrained("a-r-r-o-w/LTX-Video-0.9.7-diffusers", torch_dtype=torch.bfloat16)
9
  pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("a-r-r-o-w/LTX-Video-0.9.7-Latent-Spatial-Upsampler-diffusers", vae=pipe.vae, torch_dtype=torch.bfloat16)
10
  pipe.to("cuda")
11
  pipe_upsample.to("cuda")
@@ -20,9 +20,68 @@ def round_to_nearest_resolution_acceptable_by_vae(height, width):
20
  @spaces.GPU
21
  def generate(prompt,
22
  negative_prompt,
 
23
  steps,
24
- seed):
25
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
  css="""
@@ -64,8 +123,9 @@ with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
64
  randomize_seed = gr.Checkbox(label="randomize seed")
65
  with gr.Row():
66
  steps = gr.Slider(label="Steps", minimum=1, maximum=30, value=8, step=1)
67
- num_frames = gr.Slider(label="# frames", minimum=1, maximum=30, value=8, step=1)
68
-
 
69
 
70
 
71
 
 
5
  from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
6
  from diffusers.utils import export_to_video, load_video
7
 
8
+ pipe = LTXConditionPipeline.from_pretrained("linoyts/LTX-Video-0.9.7-distilled-diffusers", torch_dtype=torch.bfloat16)
9
  pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("a-r-r-o-w/LTX-Video-0.9.7-Latent-Spatial-Upsampler-diffusers", vae=pipe.vae, torch_dtype=torch.bfloat16)
10
  pipe.to("cuda")
11
  pipe_upsample.to("cuda")
 
20
  @spaces.GPU
21
  def generate(prompt,
22
  negative_prompt,
23
+ image,
24
  steps,
25
+ num_frames,
26
+ seed,
27
+ randomize_seed):
28
+
29
+ expected_height, expected_width = 768, 1152
30
+ downscale_factor = 2 / 3
31
+
32
+ if image is not None:
33
+ condition1 = LTXVideoCondition(video=image, frame_index=0)
34
+ else:
35
+ condition1 = None
36
+
37
+ # Part 1. Generate video at smaller resolution
38
+ # Text-only conditioning is also supported without the need to pass `conditions`
39
+ downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
40
+ downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
41
+
42
+ latents = pipe(
43
+ conditions=condition1,
44
+ prompt=prompt,
45
+ negative_prompt=negative_prompt,
46
+ width=downscaled_width,
47
+ height=downscaled_height,
48
+ num_frames=num_frames,
49
+ num_inference_steps=steps,
50
+ decode_timestep = 0.05,
51
+ decode_noise_scale = 0.025,
52
+ generator=torch.Generator().manual_seed(seed),
53
+ output_type="latent",
54
+ ).frames
55
+
56
+ # Part 2. Upscale generated video using latent upsampler with fewer inference steps
57
+ # The available latent upsampler upscales the height/width by 2x
58
+ upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
59
+ upscaled_latents = pipe_upsample(
60
+ latents=latents,
61
+ output_type="latent"
62
+ ).frames
63
+
64
+ # Part 3. Denoise the upscaled video with few steps to improve texture (optional, but recommended)
65
+ video = pipe(
66
+ conditions=condition1,
67
+ prompt=prompt,
68
+ negative_prompt=negative_prompt,
69
+ width=upscaled_width,
70
+ height=upscaled_height,
71
+ num_frames=num_frames,
72
+ denoise_strength=0.4, # Effectively, 4 inference steps out of 10
73
+ num_inference_steps=10,
74
+ latents=upscaled_latents,
75
+ decode_timestep=0.05,
76
+ image_cond_noise_scale=0.025,
77
+ generator=torch.Generator().manual_seed(seed),
78
+ output_type="pil",
79
+ ).frames[0]
80
+
81
+ # Part 4. Downscale the video to the expected resolution
82
+ video = [frame.resize((expected_width, expected_height)) for frame in video]
83
+ return video
84
+
85
 
86
 
87
  css="""
 
123
  randomize_seed = gr.Checkbox(label="randomize seed")
124
  with gr.Row():
125
  steps = gr.Slider(label="Steps", minimum=1, maximum=30, value=8, step=1)
126
+ num_frames = gr.Slider(label="# frames", minimum=1, maximum=200, value=161, step=1)
127
+
128
+
129
 
130
 
131