Spaces:

amos1088
/

test_gradio

Paused

File size: 2,794 Bytes

683afc3
c1497a6
0737dc8
a4cc7b2
 
58c4ba9
 
 
 
4fbc46c
c1497a6
683afc3
b12bc82
58c4ba9
 
 
 
 
 
a4cc7b2
58c4ba9
 
 
 
a4cc7b2
 
 
58c4ba9
 
a4cc7b2
58c4ba9
a4cc7b2
58c4ba9
 
 
a4cc7b2
 
58c4ba9
 
bcbf6e0
0737dc8
74c4e79
a4cc7b2
58c4ba9
97c3973
58c4ba9
97c3973
58c4ba9
 
a4cc7b2
58c4ba9
 
 
 
 
 
97c3973
a4cc7b2
683afc3
7968596
 
 
 
 
dbeec98
58c4ba9
dbeec98
a4cc7b2
 
7968596
 
 
 
9754bfe
7968596
683afc3
7968596
a4cc7b2

import gradio as gr
from huggingface_hub import login
import os
import spaces,tempfile
import torch
from diffusers import AnimateDiffSparseControlNetPipeline
from diffusers.models import AutoencoderKL, MotionAdapter, SparseControlNetModel
from diffusers.schedulers import DPMSolverMultistepScheduler
from diffusers.utils import export_to_gif, load_image
token = os.getenv("HF_TOKEN")
login(token=token)


model_id = "SG161222/Realistic_Vision_V5.1_noVAE"
motion_adapter_id = "guoyww/animatediff-motion-adapter-v1-5-3"
controlnet_id = "guoyww/animatediff-sparsectrl-rgb"
lora_adapter_id = "guoyww/animatediff-motion-lora-v1-5-3"
vae_id = "stabilityai/sd-vae-ft-mse"
device = "cuda"

motion_adapter = MotionAdapter.from_pretrained(motion_adapter_id, torch_dtype=torch.float16).to(device)
controlnet = SparseControlNetModel.from_pretrained(controlnet_id, torch_dtype=torch.float16).to(device)
vae = AutoencoderKL.from_pretrained(vae_id, torch_dtype=torch.float16).to(device)
scheduler = DPMSolverMultistepScheduler.from_pretrained(
    model_id,
    subfolder="scheduler",
    beta_schedule="linear",
    algorithm_type="dpmsolver++",
    use_karras_sigmas=True,
)
pipe = AnimateDiffSparseControlNetPipeline.from_pretrained(
    model_id,
    motion_adapter=motion_adapter,
    controlnet=controlnet,
    vae=vae,
    scheduler=scheduler,
    torch_dtype=torch.float16,
).to(device)
pipe.load_lora_weights(lora_adapter_id, adapter_name="motion_lora")


@spaces.GPU
def generate_image(prompt, reference_image, controlnet_conditioning_scale,num_frames):
    style_images = [load_image(f.name) for f in reference_image]

    video = pipe(
        prompt=prompt,
        negative_prompt="low quality, worst quality",
        num_inference_steps=25,
        num_frames=num_frames,
        conditioning_frames=style_images,
        controlnet_frame_indices=[0],
        controlnet_conditioning_scale=controlnet_conditioning_scale,
        generator=torch.Generator().manual_seed(42),
    ).frames[0]
    export_to_gif(video, "output.gif")

    return "animation.gif"

# Set up Gradio interface
interface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Textbox(label="Prompt"),
        # gr.Image( type= "filepath",label="Reference Image (Style)"),
        gr.File(type="file",file_count="multiple",label="Reference Image (Style)"),
        gr.Slider(label="Control Net Conditioning Scale", minimum=0, maximum=1.0, step=0.1, value=1.0),
        gr.Slider(label="Number of frames", minimum=0, maximum=1.0, step=0.1, value=1.0),

    ],
    outputs="image",
    title="Image Generation with Stable Diffusion 3 medium and ControlNet",
    description="Generates an image based on a text prompt and a reference image using Stable Diffusion 3 medium with ControlNet."

)

interface.launch()