rahul7star commited on
Commit
2980754
·
verified ·
1 Parent(s): 8dc6bbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -14
app.py CHANGED
@@ -27,9 +27,6 @@ FIXED_FPS = 24
27
  MIN_FRAMES_MODEL = 8
28
  MAX_FRAMES_MODEL = 81
29
 
30
- MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
31
- MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
32
-
33
 
34
  pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
35
  transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
@@ -85,7 +82,7 @@ def get_duration(
85
  input_image,
86
  prompt,
87
  negative_prompt,
88
- duration_seconds,
89
  guidance_scale,
90
  steps,
91
  seed,
@@ -99,18 +96,53 @@ def generate_video(
99
  input_image,
100
  prompt,
101
  negative_prompt=default_negative_prompt,
102
- duration_seconds = MAX_DURATION,
103
- guidance_scale = 1,
104
- steps = 4,
105
  seed = 42,
106
  randomize_seed = False,
107
  progress=gr.Progress(track_tqdm=True),
108
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
 
 
 
 
 
110
  if input_image is None:
111
  raise gr.Error("Please upload an input image.")
112
 
113
- num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
114
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
115
  resized_image = resize_image(input_image)
116
 
@@ -134,20 +166,20 @@ def generate_video(
134
  return video_path, current_seed
135
 
136
  with gr.Blocks() as demo:
137
- gr.Markdown("# Wan2.2-T2V-A14B AND I2V Testing")
138
- #gr.Markdown("[CausVid](https://github.com/tianweiy/CausVid) is a distilled version of Wan 2.1 to run faster in just 4-8 steps, [extracted as LoRA by Kijai](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Wan21_CausVid_14B_T2V_lora_rank32.safetensors) and is compatible with 🧨 diffusers")
139
  with gr.Row():
140
  with gr.Column():
141
  input_image_component = gr.Image(type="pil", label="Input Image (auto-resized to target H/W)")
142
  prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
143
- duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=MAX_DURATION, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
144
 
145
  with gr.Accordion("Advanced Settings", open=False):
146
  negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
147
  seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
148
  randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
149
- steps_slider = gr.Slider(minimum=1, maximum=40, step=1, value=35, label="Inference Steps")
150
- guidance_scale_input = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale", visible=False)
151
 
152
  generate_button = gr.Button("Generate Video", variant="primary")
153
  with gr.Column():
@@ -155,7 +187,7 @@ with gr.Blocks() as demo:
155
 
156
  ui_inputs = [
157
  input_image_component, prompt_input,
158
- negative_prompt_input, duration_seconds_input,
159
  guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
160
  ]
161
  generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
 
27
  MIN_FRAMES_MODEL = 8
28
  MAX_FRAMES_MODEL = 81
29
 
 
 
 
30
 
31
  pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
32
  transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
 
82
  input_image,
83
  prompt,
84
  negative_prompt,
85
+ num_frames,
86
  guidance_scale,
87
  steps,
88
  seed,
 
96
  input_image,
97
  prompt,
98
  negative_prompt=default_negative_prompt,
99
+ num_frames = MAX_FRAMES_MODEL,
100
+ guidance_scale = 3.5,
101
+ steps = 28,
102
  seed = 42,
103
  randomize_seed = False,
104
  progress=gr.Progress(track_tqdm=True),
105
  ):
106
+ """
107
+ Generate a video from an input image using the Wan 2.1 I2V model with CausVid LoRA.
108
+
109
+ This function takes an input image and generates a video animation based on the provided
110
+ prompt and parameters. It uses the Wan 2.1 14B Image-to-Video model with CausVid LoRA
111
+ for fast generation in 4-8 steps.
112
+
113
+ Args:
114
+ input_image (PIL.Image): The input image to animate. Will be resized to target dimensions.
115
+ prompt (str): Text prompt describing the desired animation or motion.
116
+ negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
117
+ Defaults to default_negative_prompt (contains unwanted visual artifacts).
118
+ num_frames (int, optional): Number of frames.
119
+ Defaults to MAX_FRAMES_MODEL
120
+ guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
121
+ Defaults to 1.0. Range: 0.0-20.0.
122
+ steps (int, optional): Number of inference steps. More steps = higher quality but slower.
123
+ Defaults to 4. Range: 1-30.
124
+ seed (int, optional): Random seed for reproducible results. Defaults to 42.
125
+ Range: 0 to MAX_SEED (2147483647).
126
+ randomize_seed (bool, optional): Whether to use a random seed instead of the provided seed.
127
+ Defaults to False.
128
+ progress (gr.Progress, optional): Gradio progress tracker. Defaults to gr.Progress(track_tqdm=True).
129
+
130
+ Returns:
131
+ tuple: A tuple containing:
132
+ - video_path (str): Path to the generated video file (.mp4)
133
+ - current_seed (int): The seed used for generation (useful when randomize_seed=True)
134
+
135
+ Raises:
136
+ gr.Error: If input_image is None (no image uploaded).
137
 
138
+ Note:
139
+ - The function automatically resizes the input image to the target dimensions
140
+ - Output dimensions are adjusted to be multiples of MOD_VALUE (32)
141
+ - The function uses GPU acceleration via the @spaces.GPU decorator
142
+ """
143
  if input_image is None:
144
  raise gr.Error("Please upload an input image.")
145
 
 
146
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
147
  resized_image = resize_image(input_image)
148
 
 
166
  return video_path, current_seed
167
 
168
  with gr.Blocks() as demo:
169
+ gr.Markdown("# Fast 4 steps Wan 2.1 I2V (14B) with CausVid LoRA")
170
+ gr.Markdown("[CausVid](https://github.com/tianweiy/CausVid) is a distilled version of Wan 2.1 to run faster in just 4-8 steps, [extracted as LoRA by Kijai](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Wan21_CausVid_14B_T2V_lora_rank32.safetensors) and is compatible with 🧨 diffusers")
171
  with gr.Row():
172
  with gr.Column():
173
  input_image_component = gr.Image(type="pil", label="Input Image (auto-resized to target H/W)")
174
  prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
175
+ num_frames_input = gr.Slider(minimum=MIN_FRAMES_MODEL, maximum=MAX_FRAMES_MODEL, step=1, value=MAX_FRAMES_MODEL, label="Frames")
176
 
177
  with gr.Accordion("Advanced Settings", open=False):
178
  negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
179
  seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
180
  randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
181
+ steps_slider = gr.Slider(minimum=1, maximum=40, step=1, value=28, label="Inference Steps")
182
+ guidance_scale_input = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale")
183
 
184
  generate_button = gr.Button("Generate Video", variant="primary")
185
  with gr.Column():
 
187
 
188
  ui_inputs = [
189
  input_image_component, prompt_input,
190
+ negative_prompt_input, num_frames_input,
191
  guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
192
  ]
193
  generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])