File size: 4,006 Bytes
683afc3
c1497a6
0737dc8
a4cc7b2
 
e285507
7c8e69e
 
58c4ba9
 
38e6a4b
 
9427817
 
 
 
38e6a4b
 
4fbc46c
c1497a6
38e6a4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
683afc3
38e6a4b
 
 
 
 
 
 
 
 
b12bc82
58c4ba9
 
a4cc7b2
9427817
 
 
 
 
 
a4cc7b2
 
9427817
 
a4cc7b2
9427817
a4cc7b2
9427817
a4cc7b2
9427817
a4cc7b2
 
9427817
 
 
 
 
 
 
 
 
38e6a4b
 
74c4e79
b4da1bd
9427817
38e6a4b
cfb6c56
3aad7ee
58c4ba9
 
9427817
e285507
58c4ba9
9427817
58c4ba9
97c3973
30aad9d
683afc3
2fd610d
 
 
 
 
 
 
 
c5aaf64
2fd610d
 
 
e8dd799
2fd610d
 
 
 
 
7968596
a4cc7b2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import gradio as gr
from huggingface_hub import login
import os
import spaces,tempfile
import torch

from diffusers import AnimateDiffSparseControlNetPipeline
from diffusers.models import AutoencoderKL, MotionAdapter, SparseControlNetModel
from diffusers.schedulers import DPMSolverMultistepScheduler
from diffusers.utils import export_to_gif, load_image
from diffusers import AutoPipelineForText2Image
import openai,json
import torch
from diffusers.models import MotionAdapter
from diffusers import AnimateDiffSDXLPipeline, DDIMScheduler
from diffusers.utils import export_to_gif


token = os.getenv("HF_TOKEN")
login(token=token)
openai_token = os.getenv("OPENAI_TOKEN")
openai.api_key = openai_token
openaiclient = openai.OpenAI(api_key=openai.api_key)

def ask_gpt(massage_history,model="gpt-4o-mini",return_str=True,response_format={"type": "json_object"}):
    response = openaiclient.chat.completions.create(
      model=model,
      messages=massage_history,
      response_format=response_format,
      max_tokens=4000,    )

    if return_str:
        return response.choices[0].message.content
    else:
        return json.loads(response.choices[0].message.content)


image_pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16).to("cuda")
image_pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")



@spaces.GPU
def generate_image(prompt, reference_image, controlnet_conditioning_scale):
    style_images = [load_image(f.name) for f in reference_image]

    image_pipeline.set_ip_adapter_scale(controlnet_conditioning_scale)

    image = image_pipeline(
        prompt=prompt,
        ip_adapter_image=[style_images],
        negative_prompt="",
        guidance_scale=5,
        num_inference_steps=30,
    ).images[0]

    return image

vae_id = "stabilityai/sd-vae-ft-mse"
device = "cuda"

adapter = MotionAdapter.from_pretrained(
    "a-r-r-o-w/animatediff-motion-adapter-sdxl-beta", torch_dtype=torch.float16
)

model_id = "stabilityai/sdxl-turbo"
scheduler = DDIMScheduler.from_pretrained(
    model_id,
    subfolder="scheduler",
    clip_sample=False,
    timestep_spacing="linspace",
    beta_schedule="linear",
    steps_offset=1,
)
gif_pipe = AnimateDiffSDXLPipeline.from_pretrained(
    model_id,
    motion_adapter=adapter,
    scheduler=scheduler,
    torch_dtype=torch.float16,
    variant="fp16",
).to("cuda")

# enable memory savings
gif_pipe.enable_vae_slicing()
gif_pipe.enable_vae_tiling()


gif_pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")


@spaces.GPU
def generate_gif(prompt, reference_image, controlnet_conditioning_scale,style_conditioning_scale,num_frames):
    image=  generate_image(prompt, reference_image, float(style_conditioning_scale))
    video = gif_pipe(
        prompt=prompt,
        ip_adapter_image=[image],
        negative_prompt="low quality, worst quality",
        num_inference_steps=25,
        guidance_scale=8,
        num_frames=int(num_frames)
    ).frames[0]

    export_to_gif(video, "output.gif")

    yield (conditioning_frames, "output.gif")

# Set up Gradio interface
interface = gr.Interface(
    fn=generate_gif,
    inputs=[
        gr.Textbox(label="Prompt"),
        # gr.Image( type= "filepath",label="Reference Image (Style)"),
        gr.File(type="filepath",file_count="multiple",label="Reference Image (Style)"),
        gr.Slider(label="Control Net Conditioning Scale", minimum=0, maximum=1.0, step=0.1, value=1.0),
        gr.Slider(label="Style Scale", minimum=0, maximum=1.0, step=0.1, value=0.6),
        gr.Slider(label="Number of frames", minimum=0, maximum=100.0, step=1.0, value=10.0),

    ],
    outputs=["gallery","image"],
    title="Image Generation with Stable Diffusion 3 medium and ControlNet",
    description="Generates an image based on a text prompt and a reference image using Stable Diffusion 3 medium with ControlNet."

)

interface.launch()