File size: 3,861 Bytes
2f3d8a1 aa0a087 984a212 2f3d8a1 f508df4 8933be3 f508df4 5394b82 8933be3 984a212 8933be3 984a212 2f3d8a1 984a212 2f3d8a1 8933be3 2f3d8a1 8933be3 2f3d8a1 8933be3 2f3d8a1 3384297 2f3d8a1 8933be3 2f3d8a1 a066be5 2f3d8a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import gradio as gr
import numpy as np
import tempfile
import imageio
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
def create_image_caption(image_init):
caption = gr.load(name="spaces/fffiloni/CoCa-clone")
cap = caption(image_init, "Nucleus sampling", 1.2, 0.5, 5, 20, fn_index=0)
print("cap: " + cap)
return cap
def export_to_video(frames: np.ndarray, fps: int) -> str:
frames = np.clip((frames * 255), 0, 255).astype(np.uint8)
out_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
writer = imageio.get_writer(out_file.name, format="FFMPEG", fps=fps)
for frame in frames:
writer.append_data(frame)
writer.close()
return out_file.name
def infer(image_init):
prompt = create_image_caption(image_init)
video_frames = pipe(prompt, num_inference_steps=40, height=320, width=576, num_frames=24).frames[0]
video_path = export_to_video(video_frames, 12)
print(video_path)
return prompt, video_path
css = """
#col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
.animate-spin {
animation: spin 1s linear infinite;
}
@keyframes spin {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
#share-btn-container {
display: flex;
padding-left: 0.5rem !important;
padding-right: 0.5rem !important;
background-color: #000000;
justify-content: center;
align-items: center;
border-radius: 9999px !important;
max-width: 13rem;
}
#share-btn-container:hover {
background-color: #060606;
}
#share-btn {
all: initial;
color: #ffffff;
font-weight: 600;
cursor:pointer;
font-family: 'IBM Plex Sans', sans-serif;
margin-left: 0.5rem !important;
padding-top: 0.5rem !important;
padding-bottom: 0.5rem !important;
right:0;
}
#share-btn * {
all: unset;
}
#share-btn-container div:nth-child(-n+2){
width: auto !important;
min-height: 0px !important;
}
#share-btn-container .wrap {
display: none !important;
}
#share-btn-container.hidden {
display: none!important;
}
img[src*='#center'] {
display: block;
margin: auto;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(
"""
<h1 style="text-align: center;">Zeroscope Image-to-Video</h1>
<p style="text-align: center;">
A watermark-free Modelscope-based video model optimized for producing high-quality 16:9 compositions and a smooth video output. <br />
This demo is a variation that lets you upload an image as reference for video generation.
</p>
[](https://huggingface.co/spaces/fffiloni/zeroscope-img-to-video?duplicate=true)
"""
)
image_init = gr.Image(label="Image Init", type="filepath", sources=["upload"], elem_id="image-init")
#inference_steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=1, value=40, interactive=False)
submit_btn = gr.Button("Submit")
coca_cap = gr.Textbox(label="Caption", placeholder="CoCa Caption will be displayed here", elem_id="coca-cap-in")
video_result = gr.Video(label="Video Output", elem_id="video-output")
submit_btn.click(
fn=infer,
inputs=[image_init],
outputs=[coca_cap, video_result],
show_api=False
)
demo.queue(max_size=12).launch(show_api=False)
|