Spaces:

pm6six
/

Image_to_video

Running

File size: 3,519 Bytes

import os
import streamlit as st
import torch

try:
    from diffusers import CogVideoXImageToVideoPipeline
    pipeline_available = True
except ImportError as e:
    pipeline_available = False
    st.error("Failed to import CogVideoXImageToVideoPipeline.")
    st.write(f"Debug info: {e}")


# Streamlit interface
st.title("Image to Video with Hugging Face")
st.write("Upload an image and provide a prompt to generate a video.")

# Check if the pipeline is available before proceeding
if not pipeline_available:
    st.error("The required pipeline is unavailable. Please ensure you have the correct version of the diffusers library.")
else:
    # File uploader for the input image
    uploaded_file = st.file_uploader("Upload an image (JPG or PNG):", type=["jpg", "jpeg", "png"])
    prompt = st.text_input("Enter your prompt:", "A little girl is riding a bicycle at high speed. Focused, detailed, realistic.")

    # Cache migration step
    st.write("Migrating the cache for model files...")
    try:
        from transformers.utils import move_cache
        move_cache()
        st.write("Cache migration completed successfully.")
    except Exception as e:
        st.error(f"Cache migration failed: {e}")
        st.write("Proceeding without cache migration...")

    if uploaded_file and prompt:
        try:
            st.write(f"Uploaded file: {uploaded_file.name}")
            st.write(f"Prompt: {prompt}")

            # Save uploaded file
            st.write("Saving uploaded image...")
            with open("uploaded_image.jpg", "wb") as f:
                f.write(uploaded_file.read())
            st.write("Uploaded image saved successfully.")

            # Load the image
            from diffusers.utils import load_image
            st.write("Loading image...")
            image = load_image("uploaded_image.jpg")
            st.write("Image loaded successfully.")

            # Initialize the pipeline
            st.write("Initializing the pipeline...")
            pipe = CogVideoXImageToVideoPipeline.from_pretrained(
                "THUDM/CogVideoX1.5-5B-I2V",
                torch_dtype=torch.bfloat16,
                cache_dir="./huggingface_cache",
                force_download=True  # Ensure fresh download
            )
            st.write("Pipeline initialized successfully.")

            # Enable optimizations
            pipe.enable_sequential_cpu_offload()
            pipe.vae.enable_tiling()
            pipe.vae.enable_slicing()

            # Generate video
            st.write("Generating video... This may take a while.")
            video_frames = pipe(
                prompt=prompt,
                image=image,
                num_videos_per_prompt=1,
                num_inference_steps=50,
                num_frames=81,
                guidance_scale=6,
                generator=torch.Generator(device="cuda").manual_seed(42),
            ).frames[0]
            st.write("Video generated successfully.")

            # Export video
            st.write("Exporting video...")
            from diffusers.utils import export_to_video
            video_path = "output.mp4"
            export_to_video(video_frames, video_path, fps=8)
            st.write("Video exported successfully.")

            # Display video
            st.video(video_path)

        except Exception as e:
            st.error(f"An error occurred: {e}")
            st.write(f"Debug info: {e}")
    else:
        st.write("Please upload an image and provide a prompt to get started.")