File size: 2,929 Bytes

1e350c6
 
0f09deb
 
1e350c6
 
673c700
f9a94e1
1e350c6
f9a94e1
 
673c700
 
 
 
 
 
1e350c6
 
 
dc76dee
e8b2b98
 
 
 
 
 
 
3128357
e8b2b98
1e350c6
 
f9a94e1
 
 
 
 
1e350c6
 
f9a94e1
1e350c6
 
f9a94e1
dc76dee
 
1e350c6
dc76dee
1e350c6
dc76dee
 
 
 
 
 
 
 
 
1e350c6
 
 
 
dc76dee
f9a94e1
dc76dee
 
 
 
 
 
 
 
 
 
1e350c6
f9a94e1
 
dc76dee
 
 
1e350c6
dc76dee
1e350c6
0f09deb
1e350c6
 
f9a94e1
1e350c6
 
f9a94e1

import os
import streamlit as st
from diffusers import CogVideoXImageToVideoPipeline
from diffusers.utils import export_to_video, load_image
import torch

# Debug: App started
st.write("App started.")

# Streamlit interface
st.title("Image to Video with Hugging Face")
st.write("Upload an image and provide a prompt to generate a video.")

# Debug: Waiting for user inputs
st.write("Waiting for image upload and prompt input...")

# File uploader for the input image
uploaded_file = st.file_uploader("Upload an image (JPG or PNG):", type=["jpg", "jpeg", "png"])
prompt = st.text_input("Enter your prompt:", "A little girl is riding a bicycle at high speed. Focused, detailed, realistic.")

# Cache migration step
st.write("Migrating the cache for model files...")
try:
    from transformers.utils import move_cache
    move_cache()
    st.write("Cache migration completed successfully.")
except Exception as e:
    st.error(f"Cache migration failed: {e}")
    st.write("Proceeding without cache migration...")

if uploaded_file and prompt:
    try:
        st.write(f"Uploaded file: {uploaded_file.name}")
        st.write(f"Prompt: {prompt}")

        # Save uploaded file
        st.write("Saving uploaded image...")
        with open("uploaded_image.jpg", "wb") as f:
            f.write(uploaded_file.read())
        st.write("Uploaded image saved successfully.")

        # Load the image
        st.write("Loading image...")
        image = load_image("uploaded_image.jpg")
        st.write("Image loaded successfully.")

        # Initialize the pipeline
        st.write("Initializing the pipeline...")
        pipe = CogVideoXImageToVideoPipeline.from_pretrained(
            "THUDM/CogVideoX1.5-5B-I2V",
            torch_dtype=torch.bfloat16,
            cache_dir="./huggingface_cache",
            force_download=True
        )
        st.write("Pipeline initialized successfully.")

        # Enable optimizations
        pipe.enable_sequential_cpu_offload()
        pipe.vae.enable_tiling()
        pipe.vae.enable_slicing()

        # Generate video
        st.write("Generating video... This may take a while.")
        video_frames = pipe(
            prompt=prompt,
            image=image,
            num_videos_per_prompt=1,
            num_inference_steps=50,
            num_frames=81,
            guidance_scale=6,
            generator=torch.Generator(device="cuda").manual_seed(42),
        ).frames[0]
        st.write("Video generated successfully.")

        # Export video
        st.write("Exporting video...")
        video_path = "output.mp4"
        export_to_video(video_frames, video_path, fps=8)
        st.write("Video exported successfully.")

        # Display video
        st.video(video_path)

    except Exception as e:
        st.error(f"An error occurred: {e}")
        st.write(f"Debug info: {e}")
else:
    st.write("Please upload an image and provide a prompt to get started.")