Spaces:
Paused
Paused
test gradio
Browse files
app.py
CHANGED
@@ -10,6 +10,10 @@ from diffusers.schedulers import DPMSolverMultistepScheduler
|
|
10 |
from diffusers.utils import export_to_gif, load_image
|
11 |
from diffusers import AutoPipelineForText2Image
|
12 |
import openai,json
|
|
|
|
|
|
|
|
|
13 |
|
14 |
|
15 |
token = os.getenv("HF_TOKEN")
|
@@ -52,78 +56,49 @@ def generate_image(prompt, reference_image, controlnet_conditioning_scale):
|
|
52 |
|
53 |
return image
|
54 |
|
55 |
-
model_id = "stabilityai/stable-diffusion-3.5-large"
|
56 |
-
motion_adapter_id = "guoyww/animatediff-motion-adapter-v1-5-3"
|
57 |
-
controlnet_id = "guoyww/animatediff-sparsectrl-rgb"
|
58 |
-
lora_adapter_id = "guoyww/animatediff-motion-lora-v1-5-3"
|
59 |
vae_id = "stabilityai/sd-vae-ft-mse"
|
60 |
device = "cuda"
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
66 |
model_id,
|
67 |
subfolder="scheduler",
|
|
|
|
|
68 |
beta_schedule="linear",
|
69 |
-
|
70 |
-
use_karras_sigmas=True,
|
71 |
)
|
72 |
-
gif_pipe =
|
73 |
model_id,
|
74 |
-
motion_adapter=
|
75 |
-
controlnet=controlnet,
|
76 |
-
vae=vae,
|
77 |
scheduler=scheduler,
|
78 |
torch_dtype=torch.float16,
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
|
83 |
@spaces.GPU
|
84 |
def generate_gif(prompt, reference_image, controlnet_conditioning_scale,style_conditioning_scale,num_frames):
|
85 |
-
|
86 |
-
You are a scene designer tasked with creating sparse frames of a video. You will be given a prompt describing the desired video, and your goal is to design only the key frames (sparse frames) that represent major changes in the scene. Do not include repetitive or similar scenes—only capture distinct moments.
|
87 |
-
|
88 |
-
Expected Format:
|
89 |
-
Return the response as a JSON object with the key "frames". The value should be a list of dictionaries, where each dictionary has:
|
90 |
-
|
91 |
-
"frame_index": an integer indicating the frame's position in the sequence.
|
92 |
-
"description": a brief description of the scene in this frame.
|
93 |
-
Example:
|
94 |
-
If given a prompt like "A sunset over a beach with waves crashing and a ship sailing by," your response should look like this:
|
95 |
-
|
96 |
-
```json
|
97 |
-
{
|
98 |
-
"frames": [
|
99 |
-
{"frame_index": 0, "description": "Sunset over an empty beach, sky turning orange and pink"},
|
100 |
-
{"frame_index": 30, "description": "Waves gently crashing on the shore"},
|
101 |
-
{"frame_index": 60, "description": "A ship appears on the horizon, silhouetted by the sunset"},
|
102 |
-
{"frame_index": 90, "description": "Ship sailing closer, with waves becoming more dynamic"},
|
103 |
-
{"frame_index": 120, "description": "Sun dipping below the horizon, casting a golden glow over the water"}
|
104 |
-
]
|
105 |
-
}
|
106 |
-
```
|
107 |
-
This way, each frame represents a distinct scene, and there’s no redundancy between them."""},
|
108 |
-
{"role": "user", "content": f"give me the frames to generate a video with prompt : `{prompt}`"},]
|
109 |
-
frames = ask_gpt(massage_history,return_str=False)['frames']
|
110 |
-
conditioning_frames = []
|
111 |
-
controlnet_frame_indices =[]
|
112 |
-
frames = frames[0:1]
|
113 |
-
for frame in frames:
|
114 |
-
conditioning_frames.append(generate_image(frame['description'], reference_image, float(style_conditioning_scale)))
|
115 |
-
controlnet_frame_indices.append(frame['frame_index'])
|
116 |
-
yield (conditioning_frames, "output.gif")
|
117 |
-
|
118 |
video = gif_pipe(
|
119 |
prompt=prompt,
|
120 |
negative_prompt="low quality, worst quality",
|
121 |
num_inference_steps=25,
|
122 |
-
|
123 |
-
controlnet_frame_indices=controlnet_frame_indices,
|
124 |
-
controlnet_conditioning_scale=float(controlnet_conditioning_scale),
|
125 |
num_frames=int(num_frames)
|
126 |
).frames[0]
|
|
|
127 |
export_to_gif(video, "output.gif")
|
128 |
|
129 |
yield (conditioning_frames, "output.gif")
|
|
|
10 |
from diffusers.utils import export_to_gif, load_image
|
11 |
from diffusers import AutoPipelineForText2Image
|
12 |
import openai,json
|
13 |
+
import torch
|
14 |
+
from diffusers.models import MotionAdapter
|
15 |
+
from diffusers import AnimateDiffSDXLPipeline, DDIMScheduler
|
16 |
+
from diffusers.utils import export_to_gif
|
17 |
|
18 |
|
19 |
token = os.getenv("HF_TOKEN")
|
|
|
56 |
|
57 |
return image
|
58 |
|
|
|
|
|
|
|
|
|
59 |
vae_id = "stabilityai/sd-vae-ft-mse"
|
60 |
device = "cuda"
|
61 |
|
62 |
+
adapter = MotionAdapter.from_pretrained(
|
63 |
+
"a-r-r-o-w/animatediff-motion-adapter-sdxl-beta", torch_dtype=torch.float16
|
64 |
+
)
|
65 |
+
|
66 |
+
model_id = "stabilityai/sdxl-turbo"
|
67 |
+
scheduler = DDIMScheduler.from_pretrained(
|
68 |
model_id,
|
69 |
subfolder="scheduler",
|
70 |
+
clip_sample=False,
|
71 |
+
timestep_spacing="linspace",
|
72 |
beta_schedule="linear",
|
73 |
+
steps_offset=1,
|
|
|
74 |
)
|
75 |
+
gif_pipe = AnimateDiffSDXLPipeline.from_pretrained(
|
76 |
model_id,
|
77 |
+
motion_adapter=adapter,
|
|
|
|
|
78 |
scheduler=scheduler,
|
79 |
torch_dtype=torch.float16,
|
80 |
+
variant="fp16",
|
81 |
+
).to("cuda")
|
82 |
+
|
83 |
+
# enable memory savings
|
84 |
+
gif_pipe.enable_vae_slicing()
|
85 |
+
gif_pipe.enable_vae_tiling()
|
86 |
+
|
87 |
+
|
88 |
+
gif_pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
|
89 |
|
90 |
|
91 |
@spaces.GPU
|
92 |
def generate_gif(prompt, reference_image, controlnet_conditioning_scale,style_conditioning_scale,num_frames):
|
93 |
+
image= generate_image(prompt, reference_image, float(style_conditioning_scale))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
video = gif_pipe(
|
95 |
prompt=prompt,
|
96 |
negative_prompt="low quality, worst quality",
|
97 |
num_inference_steps=25,
|
98 |
+
guidance_scale=8,
|
|
|
|
|
99 |
num_frames=int(num_frames)
|
100 |
).frames[0]
|
101 |
+
|
102 |
export_to_gif(video, "output.gif")
|
103 |
|
104 |
yield (conditioning_frames, "output.gif")
|