Spaces:
Paused
Paused
File size: 2,794 Bytes
683afc3 c1497a6 0737dc8 a4cc7b2 58c4ba9 4fbc46c c1497a6 683afc3 b12bc82 58c4ba9 a4cc7b2 58c4ba9 a4cc7b2 58c4ba9 a4cc7b2 58c4ba9 a4cc7b2 58c4ba9 a4cc7b2 58c4ba9 bcbf6e0 0737dc8 74c4e79 a4cc7b2 58c4ba9 97c3973 58c4ba9 97c3973 58c4ba9 a4cc7b2 58c4ba9 97c3973 a4cc7b2 683afc3 7968596 dbeec98 58c4ba9 dbeec98 a4cc7b2 7968596 9754bfe 7968596 683afc3 7968596 a4cc7b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
from huggingface_hub import login
import os
import spaces,tempfile
import torch
from diffusers import AnimateDiffSparseControlNetPipeline
from diffusers.models import AutoencoderKL, MotionAdapter, SparseControlNetModel
from diffusers.schedulers import DPMSolverMultistepScheduler
from diffusers.utils import export_to_gif, load_image
token = os.getenv("HF_TOKEN")
login(token=token)
model_id = "SG161222/Realistic_Vision_V5.1_noVAE"
motion_adapter_id = "guoyww/animatediff-motion-adapter-v1-5-3"
controlnet_id = "guoyww/animatediff-sparsectrl-rgb"
lora_adapter_id = "guoyww/animatediff-motion-lora-v1-5-3"
vae_id = "stabilityai/sd-vae-ft-mse"
device = "cuda"
motion_adapter = MotionAdapter.from_pretrained(motion_adapter_id, torch_dtype=torch.float16).to(device)
controlnet = SparseControlNetModel.from_pretrained(controlnet_id, torch_dtype=torch.float16).to(device)
vae = AutoencoderKL.from_pretrained(vae_id, torch_dtype=torch.float16).to(device)
scheduler = DPMSolverMultistepScheduler.from_pretrained(
model_id,
subfolder="scheduler",
beta_schedule="linear",
algorithm_type="dpmsolver++",
use_karras_sigmas=True,
)
pipe = AnimateDiffSparseControlNetPipeline.from_pretrained(
model_id,
motion_adapter=motion_adapter,
controlnet=controlnet,
vae=vae,
scheduler=scheduler,
torch_dtype=torch.float16,
).to(device)
pipe.load_lora_weights(lora_adapter_id, adapter_name="motion_lora")
@spaces.GPU
def generate_image(prompt, reference_image, controlnet_conditioning_scale,num_frames):
style_images = [load_image(f.name) for f in reference_image]
video = pipe(
prompt=prompt,
negative_prompt="low quality, worst quality",
num_inference_steps=25,
num_frames=num_frames,
conditioning_frames=style_images,
controlnet_frame_indices=[0],
controlnet_conditioning_scale=controlnet_conditioning_scale,
generator=torch.Generator().manual_seed(42),
).frames[0]
export_to_gif(video, "output.gif")
return "animation.gif"
# Set up Gradio interface
interface = gr.Interface(
fn=generate_image,
inputs=[
gr.Textbox(label="Prompt"),
# gr.Image( type= "filepath",label="Reference Image (Style)"),
gr.File(type="file",file_count="multiple",label="Reference Image (Style)"),
gr.Slider(label="Control Net Conditioning Scale", minimum=0, maximum=1.0, step=0.1, value=1.0),
gr.Slider(label="Number of frames", minimum=0, maximum=1.0, step=0.1, value=1.0),
],
outputs="image",
title="Image Generation with Stable Diffusion 3 medium and ControlNet",
description="Generates an image based on a text prompt and a reference image using Stable Diffusion 3 medium with ControlNet."
)
interface.launch()
|