File size: 2,948 Bytes
1e350c6
 
0f09deb
 
1e350c6
 
673c700
f9a94e1
1e350c6
f9a94e1
 
673c700
 
 
 
 
 
1e350c6
 
 
 
 
673c700
f9a94e1
 
 
 
 
1e350c6
 
f9a94e1
1e350c6
 
f9a94e1
1e350c6
f9a94e1
1e350c6
673c700
1e350c6
673c700
 
 
 
 
 
 
 
 
 
 
 
1e350c6
 
 
 
673c700
 
 
 
f9a94e1
1e350c6
 
 
 
 
 
 
 
 
f9a94e1
1e350c6
f9a94e1
 
1e350c6
 
f9a94e1
1e350c6
673c700
1e350c6
0f09deb
1e350c6
 
f9a94e1
1e350c6
673c700
1e350c6
f9a94e1
673c700
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import streamlit as st
from diffusers import CogVideoXImageToVideoPipeline
from diffusers.utils import export_to_video, load_image
import torch

# Debug: App started
st.write("App started.")

# Streamlit interface
st.title("Image to Video with Hugging Face")
st.write("Upload an image and provide a prompt to generate a video.")

# Debug: Waiting for user inputs
st.write("Waiting for image upload and prompt input...")

# File uploader for the input image
uploaded_file = st.file_uploader("Upload an image (JPG or PNG):", type=["jpg", "jpeg", "png"])
prompt = st.text_input("Enter your prompt:", "A little girl is riding a bicycle at high speed. Focused, detailed, realistic.")

if uploaded_file and prompt:
    try:
        # Debug: File and prompt received
        st.write(f"Uploaded file: {uploaded_file.name}")
        st.write(f"Prompt: {prompt}")

        # Save uploaded file
        st.write("Saving uploaded image...")
        with open("uploaded_image.jpg", "wb") as f:
            f.write(uploaded_file.read())
        st.write("Uploaded image saved successfully.")

        # Load the image
        st.write("Loading image...")
        image = load_image("uploaded_image.jpg")
        st.write("Image loaded successfully.")

        # Initialize the CogVideoX pipeline
        st.write("Initializing the pipeline...")
        try:
            pipe = CogVideoXImageToVideoPipeline.from_pretrained(
                "THUDM/CogVideoX1.5-5B-I2V",
                torch_dtype=torch.bfloat16
            )
            st.write("Pipeline initialized successfully.")
        except Exception as e:
            st.error("Error during pipeline initialization.")
            st.write(f"Debug info: {e}")
            raise e

        # Enable optimizations for large models
        pipe.enable_sequential_cpu_offload()
        pipe.vae.enable_tiling()
        pipe.vae.enable_slicing()

        # Debug: Ready to generate video
        st.write("Pipeline setup complete. Ready to generate video.")

        # Generate the video
        st.write("Generating video... This may take a while.")
        video_frames = pipe(
            prompt=prompt,
            image=image,
            num_videos_per_prompt=1,
            num_inference_steps=50,
            num_frames=81,
            guidance_scale=6,
            generator=torch.Generator(device="cuda").manual_seed(42),
        ).frames[0]
        st.write("Video generated successfully.")

        # Export video
        st.write("Exporting video...")
        video_path = "output.mp4"
        export_to_video(video_frames, video_path, fps=8)
        st.write("Video exported successfully.")

        # Display video in Streamlit
        st.video(video_path)

    except Exception as e:
        st.error(f"An error occurred: {e}")
        st.write(f"Debug info: {e}")
else:
    # Debug: Waiting for inputs
    st.write("Please upload an image and provide a prompt to get started.")