Spaces:

roll-ai
/

FloVD

Paused

App Files Files Community

roll-ai commited on 25 days ago

Commit

ded78ff

verified ·

1 Parent(s): 7e06d6b

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -44

app.py CHANGED Viewed

@@ -30,57 +30,53 @@ def download_weights():
             print(f"✅ Already exists: {save_path}")
 download_weights()
-# =========================================
-# 2. Import and load FloVD pipeline
-# =========================================
-from inference.flovd_demo import load_pipeline, generate_video
-pipeline = load_pipeline(
-    fvsm_path="ckpt/FVSM/FloVD_FVSM_Controlnet.pt",
-    omsm_path="ckpt/OMSM",
-    depth_path="ckpt/others/depth_anything_v2_metric_hypersim_vitb.pth",
-    device="cuda" if torch.cuda.is_available() else "cpu"
-)
-# =========================================
-# 3. Inference Function
-# =========================================
-def run_inference(image: Image.Image, prompt: str, cam_traj_path: str):
-    print("🚀 Running inference...")
-    output_path = generate_video(
-        image=image,
         prompt=prompt,
-        cam_traj=cam_traj_path,
-        pipeline=pipeline,
         num_frames=49,
         fps=16,
         controlnet_guidance_end=0.4,
-        flow_scale=(60, 36)
     )
-    return output_path
-# =========================================
-# 4. Gradio UI
-# =========================================
-example_image = "assets/manual_poses/example_image.jpg"
-example_cam = "assets/cam_trajectory/dolly_zoom.txt"
-demo = gr.Interface(
-    fn=run_inference,
-    inputs=[
-        gr.Image(label="Input Image", type="pil"),
-        gr.Textbox(label="Text Prompt", value="A cinematic dolly zoom shot of a futuristic cityscape"),
-        gr.Textbox(label="Camera Trajectory File Path", value=example_cam),
-    ],
-    outputs=gr.Video(label="Generated Video"),
-    title="FloVD-CogVideoX 🌠",
-    description="Upload an image, enter a text prompt and a camera trajectory file path to generate a controlled video using CogVideoX + optical flow.",
-    examples=[[example_image, "A beautiful sunrise over a mountain range", example_cam]]
-)
-if __name__ == "__main__":
-    demo.launch()

             print(f"✅ Already exists: {save_path}")
 download_weights()
+import gradio as gr
+import torch
+import os
+from inference_script import generate_video  # Assuming your script is saved as inference_script.py
+def run_inference(prompt, image, pose_type, speed, use_flow_integration, cam_pose_name):
+    os.makedirs("input_images", exist_ok=True)
+    image_path = "input_images/input_image.png"
+    image.save(image_path)
+    generate_video(
         prompt=prompt,
+        image_path=image_path,
+        fvsm_path="./ckpt/FVSM",  # Expected to be downloaded from HF dataset
+        omsm_path="./ckpt/OMSM",  # Expected to be downloaded from HF dataset
+        output_path="./outputs",
         num_frames=49,
         fps=16,
+        width=None,
+        height=None,
+        seed=42,
+        guidance_scale=6.0,
+        dtype=torch.float16,
         controlnet_guidance_end=0.4,
+        use_dynamic_cfg=False,
+        pose_type=pose_type,
+        speed=float(speed),
+        use_flow_integration=use_flow_integration,
+        cam_pose_name=cam_pose_name,
+        depth_ckpt_path="./ckpt/others/depth_anything_v2_metric_hypersim_vitb.pth"
     )
+    return f"./outputs/generated_videos/{prompt[:30].strip().replace(' ', '_')}_{cam_pose_name or 'default'}.mp4"
+with gr.Blocks() as demo:
+    gr.Markdown("## 🎥 FloVD: Optical Flow + CogVideoX Video Generation")
+    with gr.Row():
+        with gr.Column():
+            prompt = gr.Textbox(label="Prompt", value="A girl riding a bicycle through a park.")
+            image = gr.Image(type="pil", label="Input Image")
+            pose_type = gr.Radio(choices=["manual", "re10k"], value="manual", label="Camera Pose Type")
+            cam_pose_name = gr.Textbox(label="Camera Trajectory Name", placeholder="e.g. zoom_in, tilt_up")
+            speed = gr.Slider(minimum=0.1, maximum=2.0, step=0.1, value=0.5, label="Speed")
+            use_flow_integration = gr.Checkbox(label="Use Flow Integration", value=False)
+            submit = gr.Button("Generate Video")
+        with gr.Column():
+            output_video = gr.Video(label="Generated Video")
+    submit.click(fn=run_inference, inputs=[prompt, image, pose_type, speed, use_flow_integration, cam_pose_name], outputs=output_video)
+demo.launch()