amos1088 commited on
Commit
9427817
·
1 Parent(s): d9642a7

test gradio

Browse files
Files changed (1) hide show
  1. app.py +27 -52
app.py CHANGED
@@ -10,6 +10,10 @@ from diffusers.schedulers import DPMSolverMultistepScheduler
10
  from diffusers.utils import export_to_gif, load_image
11
  from diffusers import AutoPipelineForText2Image
12
  import openai,json
 
 
 
 
13
 
14
 
15
  token = os.getenv("HF_TOKEN")
@@ -52,78 +56,49 @@ def generate_image(prompt, reference_image, controlnet_conditioning_scale):
52
 
53
  return image
54
 
55
- model_id = "stabilityai/stable-diffusion-3.5-large"
56
- motion_adapter_id = "guoyww/animatediff-motion-adapter-v1-5-3"
57
- controlnet_id = "guoyww/animatediff-sparsectrl-rgb"
58
- lora_adapter_id = "guoyww/animatediff-motion-lora-v1-5-3"
59
  vae_id = "stabilityai/sd-vae-ft-mse"
60
  device = "cuda"
61
 
62
- motion_adapter = MotionAdapter.from_pretrained(motion_adapter_id, torch_dtype=torch.float16).to(device)
63
- controlnet = SparseControlNetModel.from_pretrained(controlnet_id, torch_dtype=torch.float16).to(device)
64
- vae = AutoencoderKL.from_pretrained(vae_id, torch_dtype=torch.float16).to(device)
65
- scheduler = DPMSolverMultistepScheduler.from_pretrained(
 
 
66
  model_id,
67
  subfolder="scheduler",
 
 
68
  beta_schedule="linear",
69
- algorithm_type="dpmsolver++",
70
- use_karras_sigmas=True,
71
  )
72
- gif_pipe = AnimateDiffSparseControlNetPipeline.from_pretrained(
73
  model_id,
74
- motion_adapter=motion_adapter,
75
- controlnet=controlnet,
76
- vae=vae,
77
  scheduler=scheduler,
78
  torch_dtype=torch.float16,
79
- ).to(device)
80
- gif_pipe.load_lora_weights(lora_adapter_id, adapter_name="motion_lora")
 
 
 
 
 
 
 
81
 
82
 
83
  @spaces.GPU
84
  def generate_gif(prompt, reference_image, controlnet_conditioning_scale,style_conditioning_scale,num_frames):
85
- massage_history = [{"role": "system", "content": """
86
- You are a scene designer tasked with creating sparse frames of a video. You will be given a prompt describing the desired video, and your goal is to design only the key frames (sparse frames) that represent major changes in the scene. Do not include repetitive or similar scenes—only capture distinct moments.
87
-
88
- Expected Format:
89
- Return the response as a JSON object with the key "frames". The value should be a list of dictionaries, where each dictionary has:
90
-
91
- "frame_index": an integer indicating the frame's position in the sequence.
92
- "description": a brief description of the scene in this frame.
93
- Example:
94
- If given a prompt like "A sunset over a beach with waves crashing and a ship sailing by," your response should look like this:
95
-
96
- ```json
97
- {
98
- "frames": [
99
- {"frame_index": 0, "description": "Sunset over an empty beach, sky turning orange and pink"},
100
- {"frame_index": 30, "description": "Waves gently crashing on the shore"},
101
- {"frame_index": 60, "description": "A ship appears on the horizon, silhouetted by the sunset"},
102
- {"frame_index": 90, "description": "Ship sailing closer, with waves becoming more dynamic"},
103
- {"frame_index": 120, "description": "Sun dipping below the horizon, casting a golden glow over the water"}
104
- ]
105
- }
106
- ```
107
- This way, each frame represents a distinct scene, and there’s no redundancy between them."""},
108
- {"role": "user", "content": f"give me the frames to generate a video with prompt : `{prompt}`"},]
109
- frames = ask_gpt(massage_history,return_str=False)['frames']
110
- conditioning_frames = []
111
- controlnet_frame_indices =[]
112
- frames = frames[0:1]
113
- for frame in frames:
114
- conditioning_frames.append(generate_image(frame['description'], reference_image, float(style_conditioning_scale)))
115
- controlnet_frame_indices.append(frame['frame_index'])
116
- yield (conditioning_frames, "output.gif")
117
-
118
  video = gif_pipe(
119
  prompt=prompt,
120
  negative_prompt="low quality, worst quality",
121
  num_inference_steps=25,
122
- conditioning_frames=conditioning_frames,
123
- controlnet_frame_indices=controlnet_frame_indices,
124
- controlnet_conditioning_scale=float(controlnet_conditioning_scale),
125
  num_frames=int(num_frames)
126
  ).frames[0]
 
127
  export_to_gif(video, "output.gif")
128
 
129
  yield (conditioning_frames, "output.gif")
 
10
  from diffusers.utils import export_to_gif, load_image
11
  from diffusers import AutoPipelineForText2Image
12
  import openai,json
13
+ import torch
14
+ from diffusers.models import MotionAdapter
15
+ from diffusers import AnimateDiffSDXLPipeline, DDIMScheduler
16
+ from diffusers.utils import export_to_gif
17
 
18
 
19
  token = os.getenv("HF_TOKEN")
 
56
 
57
  return image
58
 
 
 
 
 
59
  vae_id = "stabilityai/sd-vae-ft-mse"
60
  device = "cuda"
61
 
62
+ adapter = MotionAdapter.from_pretrained(
63
+ "a-r-r-o-w/animatediff-motion-adapter-sdxl-beta", torch_dtype=torch.float16
64
+ )
65
+
66
+ model_id = "stabilityai/sdxl-turbo"
67
+ scheduler = DDIMScheduler.from_pretrained(
68
  model_id,
69
  subfolder="scheduler",
70
+ clip_sample=False,
71
+ timestep_spacing="linspace",
72
  beta_schedule="linear",
73
+ steps_offset=1,
 
74
  )
75
+ gif_pipe = AnimateDiffSDXLPipeline.from_pretrained(
76
  model_id,
77
+ motion_adapter=adapter,
 
 
78
  scheduler=scheduler,
79
  torch_dtype=torch.float16,
80
+ variant="fp16",
81
+ ).to("cuda")
82
+
83
+ # enable memory savings
84
+ gif_pipe.enable_vae_slicing()
85
+ gif_pipe.enable_vae_tiling()
86
+
87
+
88
+ gif_pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
89
 
90
 
91
  @spaces.GPU
92
  def generate_gif(prompt, reference_image, controlnet_conditioning_scale,style_conditioning_scale,num_frames):
93
+ image= generate_image(prompt, reference_image, float(style_conditioning_scale))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  video = gif_pipe(
95
  prompt=prompt,
96
  negative_prompt="low quality, worst quality",
97
  num_inference_steps=25,
98
+ guidance_scale=8,
 
 
99
  num_frames=int(num_frames)
100
  ).frames[0]
101
+
102
  export_to_gif(video, "output.gif")
103
 
104
  yield (conditioning_frames, "output.gif")