FastWan2.2_5B_TI2V

Running on Zero

App Files Files Community

rahul7star commited on 24 days ago

Commit

2980754

verified ·

1 Parent(s): 8dc6bbe

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -14

app.py CHANGED Viewed

@@ -27,9 +27,6 @@ FIXED_FPS = 24
 MIN_FRAMES_MODEL = 8
 MAX_FRAMES_MODEL = 81
-MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
-MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
 pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
@@ -85,7 +82,7 @@ def get_duration(
     input_image,
     prompt,
     negative_prompt,
-    duration_seconds,
     guidance_scale,
     steps,
     seed,
@@ -99,18 +96,53 @@ def generate_video(
     input_image,
     prompt,
     negative_prompt=default_negative_prompt,
-    duration_seconds = MAX_DURATION,
-    guidance_scale = 1,
-    steps = 4,
     seed = 42,
     randomize_seed = False,
     progress=gr.Progress(track_tqdm=True),
 ):
     if input_image is None:
         raise gr.Error("Please upload an input image.")
-    num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized_image = resize_image(input_image)
@@ -134,20 +166,20 @@ def generate_video(
     return video_path, current_seed
 with gr.Blocks() as demo:
-    gr.Markdown("# Wan2.2-T2V-A14B AND I2V Testing")
-    #gr.Markdown("[CausVid](https://github.com/tianweiy/CausVid) is a distilled version of Wan 2.1 to run faster in just 4-8 steps, [extracted as LoRA by Kijai](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Wan21_CausVid_14B_T2V_lora_rank32.safetensors) and is compatible with 🧨 diffusers")
     with gr.Row():
         with gr.Column():
             input_image_component = gr.Image(type="pil", label="Input Image (auto-resized to target H/W)")
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
-            duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=MAX_DURATION, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
-                steps_slider = gr.Slider(minimum=1, maximum=40, step=1, value=35, label="Inference Steps")
-                guidance_scale_input = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale", visible=False)
             generate_button = gr.Button("Generate Video", variant="primary")
         with gr.Column():
@@ -155,7 +187,7 @@ with gr.Blocks() as demo:
     ui_inputs = [
         input_image_component, prompt_input,
-        negative_prompt_input, duration_seconds_input,
         guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
     ]
     generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])

 MIN_FRAMES_MODEL = 8
 MAX_FRAMES_MODEL = 81
 pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
     transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
     input_image,
     prompt,
     negative_prompt,
+    num_frames,
     guidance_scale,
     steps,
     seed,
     input_image,
     prompt,
     negative_prompt=default_negative_prompt,
+    num_frames = MAX_FRAMES_MODEL,
+    guidance_scale = 3.5,
+    steps = 28,
     seed = 42,
     randomize_seed = False,
     progress=gr.Progress(track_tqdm=True),
 ):
+    """
+    Generate a video from an input image using the Wan 2.1 I2V model with CausVid LoRA.
+    This function takes an input image and generates a video animation based on the provided
+    prompt and parameters. It uses the Wan 2.1 14B Image-to-Video model with CausVid LoRA
+    for fast generation in 4-8 steps.
+    Args:
+        input_image (PIL.Image): The input image to animate. Will be resized to target dimensions.
+        prompt (str): Text prompt describing the desired animation or motion.
+        negative_prompt (str, optional): Negative prompt to avoid unwanted elements.
+            Defaults to default_negative_prompt (contains unwanted visual artifacts).
+        num_frames (int, optional): Number of frames.
+            Defaults to MAX_FRAMES_MODEL
+        guidance_scale (float, optional): Controls adherence to the prompt. Higher values = more adherence.
+            Defaults to 1.0. Range: 0.0-20.0.
+        steps (int, optional): Number of inference steps. More steps = higher quality but slower.
+            Defaults to 4. Range: 1-30.
+        seed (int, optional): Random seed for reproducible results. Defaults to 42.
+            Range: 0 to MAX_SEED (2147483647).
+        randomize_seed (bool, optional): Whether to use a random seed instead of the provided seed.
+            Defaults to False.
+        progress (gr.Progress, optional): Gradio progress tracker. Defaults to gr.Progress(track_tqdm=True).
+    Returns:
+        tuple: A tuple containing:
+            - video_path (str): Path to the generated video file (.mp4)
+            - current_seed (int): The seed used for generation (useful when randomize_seed=True)
+    Raises:
+        gr.Error: If input_image is None (no image uploaded).
+    Note:
+        - The function automatically resizes the input image to the target dimensions
+        - Output dimensions are adjusted to be multiples of MOD_VALUE (32)
+        - The function uses GPU acceleration via the @spaces.GPU decorator
+    """
     if input_image is None:
         raise gr.Error("Please upload an input image.")
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized_image = resize_image(input_image)
     return video_path, current_seed
 with gr.Blocks() as demo:
+    gr.Markdown("# Fast 4 steps Wan 2.1 I2V (14B) with CausVid LoRA")
+    gr.Markdown("[CausVid](https://github.com/tianweiy/CausVid) is a distilled version of Wan 2.1 to run faster in just 4-8 steps, [extracted as LoRA by Kijai](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Wan21_CausVid_14B_T2V_lora_rank32.safetensors) and is compatible with 🧨 diffusers")
     with gr.Row():
         with gr.Column():
             input_image_component = gr.Image(type="pil", label="Input Image (auto-resized to target H/W)")
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
+            num_frames_input = gr.Slider(minimum=MIN_FRAMES_MODEL, maximum=MAX_FRAMES_MODEL, step=1, value=MAX_FRAMES_MODEL, label="Frames")
             with gr.Accordion("Advanced Settings", open=False):
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
+                steps_slider = gr.Slider(minimum=1, maximum=40, step=1, value=28, label="Inference Steps")
+                guidance_scale_input = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale")
             generate_button = gr.Button("Generate Video", variant="primary")
         with gr.Column():
     ui_inputs = [
         input_image_component, prompt_input,
+        negative_prompt_input, num_frames_input,
         guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
     ]
     generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])