Sergidev commited on
Commit
0250a5e
·
1 Parent(s): eb9cf96
Files changed (1) hide show
  1. demo_app.py +334 -102
demo_app.py CHANGED
@@ -1,121 +1,353 @@
1
  import spaces
 
2
  import gradio as gr
3
  import numpy as np
4
- import torch
5
- from diffusers import HunyuanVideoPipeline
 
 
6
  from huggingface_hub import snapshot_download
 
7
  from PIL import Image
8
- import os
9
 
10
  # Configuration
11
- LORA_CHOICES = [
12
- "Top_Off.safetensors",
13
- "huanyan_helper.safetensors",
14
- "huanyan_helper_alpha.safetensors",
15
- "hunyuan-t-solo-v1.0.safetensors",
16
- "stripe_v2.safetensors"
17
- ]
18
 
19
- MAX_SEED = np.iinfo(np.int32).max
20
- MAX_IMAGE_SIZE = 1024
 
 
 
21
 
22
- # Initialize pipeline with ZeroGPU optimizations
23
- model_id = "Tencent-Hunyuan/Hunyuan-Video-Lite"
 
 
 
 
 
 
 
 
 
24
  pipe = HunyuanVideoPipeline.from_pretrained(
25
- model_id,
 
26
  torch_dtype=torch.float16
27
  ).to("cuda")
28
 
29
- # Load LoRA adapters
30
- for lora_file in LORA_CHOICES:
31
- pipe.load_lora_weights(
32
- "Sergidev/TTV4ME",
33
- weight_name=lora_file,
34
- adapter_name=lora_file.split('.')[0],
35
- token=os.environ.get("HF_TOKEN")
36
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  @spaces.GPU(duration=300)
39
- def generate(prompt, image_input, height, width, num_frames,
40
- num_inference_steps, seed_value, fps, selected_loras, lora_weights):
41
- # Image validation
42
- if image_input is not None:
43
- img = Image.open(image_input)
44
- if img.size != (width, height):
45
- raise gr.Error(f"Image resolution {img.size} must match video resolution {width}x{height}")
46
- prompt = f"Image prompt: {prompt}" if prompt else "Based on uploaded image"
47
-
48
- # Set active LoRAs
49
- active_adapters = []
50
- adapter_weights = []
51
- for idx, selected in enumerate(selected_loras):
52
- if selected:
53
- active_adapters.append(LORA_CHOICES[idx].split('.')[0])
54
- adapter_weights.append(lora_weights[idx])
55
-
56
- pipe.set_adapters(active_adapters, adapter_weights)
57
-
58
- # Generate video
59
- generator = torch.Generator('cuda').manual_seed(seed_value if seed_value != -1 else torch.seed())
60
-
61
- if image_input:
62
- output = pipe.image_to_video(
63
- Image.open(image_input).convert("RGB"),
64
- prompt=prompt,
65
- height=height,
66
- width=width,
67
- num_frames=num_frames,
68
- num_inference_steps=num_inference_steps,
69
- generator=generator,
70
- )
71
- else:
72
- output = pipe.text_to_video(
73
- prompt=prompt,
74
- height=height,
75
- width=width,
76
- num_frames=num_frames,
77
- num_inference_steps=num_inference_steps,
78
- generator=generator,
79
- )
80
 
81
- return output.frames[0]
 
 
 
 
 
 
82
 
83
- with gr.Blocks(theme="dark") as demo:
84
- with gr.Column():
85
- gr.Markdown("# 🎬 Hunyuan Studio")
 
 
 
 
86
 
87
- with gr.Row():
88
- with gr.Column():
89
- prompt = gr.Textbox(label="Prompt")
90
- image_input = gr.Image(label="Input Image", type="filepath")
91
-
92
- with gr.Accordion("Advanced Settings"):
93
- resolution = gr.Dropdown(
94
- choices=["512x512", "768x768", "1024x1024"],
95
- value="512x512",
96
- label="Output Resolution"
97
  )
98
- seed = gr.Slider(-1, MAX_SEED, value=-1, label="Seed")
99
- num_frames = gr.Slider(1, 257, 24, label="Frame Count")
100
- num_inference_steps = gr.Slider(1, 50, 25, label="Inference Steps")
101
- fps = gr.Slider(1, 60, 12, label="FPS")
102
-
103
- with gr.Accordion("LoRA Configuration"):
104
- lora_components = []
105
- for lora in LORA_CHOICES:
106
- lora_components.append(gr.Checkbox(label=f"Enable {lora}"))
107
- lora_components.append(gr.Slider(0.0, 1.0, 0.8, label=f"{lora} Weight"))
108
-
109
- generate_btn = gr.Button("Generate Video")
110
-
111
- with gr.Column():
112
- output_video = gr.Video(label="Result")
113
-
114
- generate_btn.click(
115
- fn=generate,
116
- inputs=[prompt, image_input,
117
- gr.Number(512), gr.Number(512), # Height/width from resolution
118
- num_frames, num_inference_steps, seed, fps,
119
- *lora_components],
120
- outputs=output_video
121
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import spaces
2
+ import gc
3
  import gradio as gr
4
  import numpy as np
5
+ import os
6
+ from pathlib import Path
7
+ from diffusers import GGUFQuantizationConfig, HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
8
+ from diffusers.utils import export_to_video
9
  from huggingface_hub import snapshot_download
10
+ import torch
11
  from PIL import Image
 
12
 
13
  # Configuration
14
+ gc.collect()
15
+ torch.cuda.empty_cache()
16
+ torch.set_grad_enabled(False)
17
+ torch.backends.cudnn.deterministic = True
18
+ torch.backends.cudnn.benchmark = False
 
 
19
 
20
+ # Load base model
21
+ model_id = "hunyuanvideo-community/HunyuanVideo"
22
+ base_path = f"/home/user/app/{model_id}"
23
+ os.makedirs(base_path, exist_ok=True)
24
+ snapshot_download(repo_id=model_id, local_dir=base_path)
25
 
26
+ # Load transformer
27
+ ckp_path = Path(base_path)
28
+ gguf_filename = "hunyuan-video-t2v-720p-Q4_0.gguf"
29
+ transformer_path = f"https://huggingface.co/city96/HunyuanVideo-gguf/blob/main/{gguf_filename}"
30
+ transformer = HunyuanVideoTransformer3DModel.from_single_file(
31
+ transformer_path,
32
+ quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
33
+ torch_dtype=torch.bfloat16,
34
+ ).to('cuda')
35
+
36
+ # Initialize pipeline
37
  pipe = HunyuanVideoPipeline.from_pretrained(
38
+ ckp_path,
39
+ transformer=transformer,
40
  torch_dtype=torch.float16
41
  ).to("cuda")
42
 
43
+ # Configure VAE
44
+ pipe.vae.enable_tiling()
45
+ pipe.vae.enable_slicing()
46
+ pipe.vae.eval()
47
+
48
+ # Available LoRAs in the TTV4ME repository
49
+ TTV4ME_Loras = {
50
+ "Top_Off.safetensors": "Top_Off.safetensors",
51
+ "huanyan_helper.safetensors": "huanyan_helper.safetensors",
52
+ "huanyan_helper_alpha.safetensors": "huanyan_helper_alpha.safetensors",
53
+ "hunyuan-t-solo-v1.0.safetensors": "hunyuan-t-solo-v1.0.safetensors",
54
+ "stripe_v2.safetensors": "stripe_v2.safetensors"
55
+ }
56
+
57
+ # Illustration Lora
58
+ ILLUSTRATION_LORA = "sergidev/IllustrationTTV"
59
+ ILLUSTRATION_LORA_NAME = "hunyuan_flat_color_v2.safetensors"
60
+ ILLUSTRATION_ADAPTER_NAME = "hyvid_lora_adapter"
61
+
62
+ # Load default LoRA adapters
63
+ pipe.load_lora_weights(
64
+ "Sergidev/TTV4ME", # Private repository
65
+ weight_name="stripe_v2.safetensors",
66
+ adapter_name="hunyuanvideo-lora",
67
+ token=os.environ.get("HF_TOKEN") # Access token from Space secrets
68
+ )
69
+
70
+ pipe.load_lora_weights(
71
+ "sergidev/IllustrationTTV",
72
+ weight_name="hunyuan_flat_color_v2.safetensors",
73
+ adapter_name="hyvid_lora_adapter"
74
+ )
75
+
76
+ # Set combined adapter weights
77
+ pipe.set_adapters(["hunyuanvideo-lora", "hyvid_lora_adapter"], [0.9, 0.8])
78
+
79
+ # Memory cleanup
80
+ gc.collect()
81
+ torch.cuda.empty_cache()
82
+
83
+ MAX_SEED = np.iinfo(np.int32).max
84
+ MAX_IMAGE_SIZE = 1024
85
+
86
 
87
  @spaces.GPU(duration=300)
88
+ def generate(
89
+ prompt,
90
+ uploaded_image,
91
+ height,
92
+ width,
93
+ num_frames,
94
+ num_inference_steps,
95
+ seed_value,
96
+ fps,
97
+ lora_names,
98
+ lora_weights,
99
+ progress=gr.Progress(track_tqdm=True)
100
+ ):
101
+ with torch.cuda.device(0):
102
+ if seed_value == -1:
103
+ seed_value = torch.randint(0, MAX_SEED, (1,)).item()
104
+ generator = torch.Generator('cuda').manual_seed(seed_value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ # Handle image input
107
+ if uploaded_image:
108
+ init_image = Image.open(uploaded_image).convert("RGB").resize((width, height))
109
+ if init_image.size != (width, height):
110
+ raise gr.Error("Uploaded image resolution must match specified width and height.")
111
+ else:
112
+ init_image = None
113
 
114
+ # Configure LoRA adapters
115
+ adapter_names = ["hyvid_lora_adapter"] # Always include the illustration Lora
116
+ adapter_weights = [0.8] # Illustration Lora weight
117
+ for i, lora_name in enumerate(lora_names):
118
+ if lora_name != "None":
119
+ adapter_names.append("ttv4me_" + lora_name.split('.')[0]) # Create unique adapter name
120
+ adapter_weights.append(lora_weights[i])
121
 
122
+ # Check if the LoRA is already loaded, if not, load it
123
+ if not hasattr(pipe, "ttv4me_" + lora_name.split('.')[0]):
124
+ pipe.load_lora_weights(
125
+ "Sergidev/TTV4ME", # Private repository
126
+ weight_name=lora_name,
127
+ adapter_name="ttv4me_" + lora_name.split('.')[0],
128
+ token=os.environ.get("HF_TOKEN") # Access token from Space secrets
 
 
 
129
  )
130
+
131
+ pipe.set_adapters(adapter_names, adapter_weights)
132
+
133
+ with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad():
134
+ output = pipe(
135
+ prompt=prompt,
136
+ image=init_image,
137
+ height=height,
138
+ width=width,
139
+ num_frames=num_frames,
140
+ num_inference_steps=num_inference_steps,
141
+ generator=generator,
142
+ ).frames[0]
143
+
144
+ output_path = "output.mp4"
145
+ export_to_video(output, output_path, fps=fps)
146
+ torch.cuda.empty_cache()
147
+ gc.collect()
148
+ return output_path
149
+
150
+
151
+ def apply_preset(preset_name, *current_values):
152
+ if preset_name == "Higher Resolution":
153
+ return [608, 448, 24, 29, 12]
154
+ elif preset_name == "More Frames":
155
+ return [512, 320, 42, 27, 14]
156
+ return current_values
157
+
158
+
159
+ css = """
160
+ #col-container {
161
+ margin: 0 auto;
162
+ max-width: 850px;
163
+ }
164
+
165
+ .dark-theme {
166
+ background-color: #1f1f1f;
167
+ color: #ffffff;
168
+ }
169
+
170
+ .container {
171
+ margin: 0 auto;
172
+ padding: 20px;
173
+ border-radius: 10px;
174
+ background-color: #2d2d2d;
175
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
176
+ }
177
+
178
+ .title {
179
+ text-align: center;
180
+ margin-bottom: 1em;
181
+ color: #ffffff;
182
+ }
183
+
184
+ .description {
185
+ text-align: center;
186
+ margin-bottom: 2em;
187
+ color: #cccccc;
188
+ font-size: 0.95em;
189
+ line-height: 1.5;
190
+ }
191
+
192
+ .prompt-container {
193
+ background-color: #363636;
194
+ padding: 15px;
195
+ border-radius: 8px;
196
+ margin-bottom: 1em;
197
+ width: 100%;
198
+ }
199
+
200
+ .prompt-textbox {
201
+ min-height: 80px !important;
202
+ }
203
+
204
+ .preset-buttons {
205
+ display: flex;
206
+ gap: 10px;
207
+ justify-content: center;
208
+ margin-bottom: 1em;
209
+ }
210
+
211
+ .support-text {
212
+ text-align: center;
213
+ margin-top: 1em;
214
+ color: #cccccc;
215
+ font-size: 0.9em;
216
+ }
217
+
218
+ a {
219
+ color: #00a7e1;
220
+ text-decoration: none;
221
+ }
222
+
223
+ a:hover {
224
+ text-decoration: underline;
225
+ }
226
+ """
227
+
228
+ with gr.Blocks(css=css, theme="dark") as demo:
229
+ with gr.Column(elem_id="col-container"):
230
+ gr.Markdown("# 🎬 Huanyan Studio", elem_classes=["title"])
231
+ gr.Markdown(
232
+ """Image-to-video, text-to-video, with multiple LORAS to use.
233
+ This space uses the 'hunyuan flat color v2' LORA by Motimalu to generate better 2d animated sequences. Prompt only handles 77 tokens.
234
+ If you find this useful, please consider giving the space a ❤️ and supporting me on [Ko-Fi](https://ko-fi.com/sergidev)!""",
235
+ elem_classes=["description"]
236
+ )
237
+
238
+ with gr.Column(elem_classes=["prompt-container"]):
239
+ prompt = gr.Textbox(
240
+ label="Prompt",
241
+ placeholder="Enter your prompt here (Include the terms 'flat color, no lineart, blending' for 2d illustration)",
242
+ show_label=False,
243
+ elem_classes=["prompt-textbox"],
244
+ lines=3
245
+ )
246
+ with gr.Column(elem_classes=["prompt-container"]):
247
+ image_input = gr.Image(label="Upload Image (Optional)", image_types=["png", "jpg", "jpeg"])
248
+
249
+ with gr.Row():
250
+ run_button = gr.Button("🎨 Generate", variant="primary", size="lg")
251
+ with gr.Row(elem_classes=["preset-buttons"]):
252
+ preset_high_res = gr.Button("📺 Higher Resolution Preset")
253
+ preset_more_frames = gr.Button("🎞️ More Frames Preset")
254
+ with gr.Row():
255
+ result = gr.Video(label="Generated Video")
256
+
257
+ with gr.Accordion("⚙️ Advanced Settings", open=False):
258
+ seed = gr.Slider(
259
+ label="Seed (-1 for random)",
260
+ minimum=-1,
261
+ maximum=MAX_SEED,
262
+ step=1,
263
+ value=-1,
264
+ )
265
+
266
+ with gr.Row():
267
+ height = gr.Slider(
268
+ label="Height",
269
+ minimum=256,
270
+ maximum=MAX_IMAGE_SIZE,
271
+ step=16,
272
+ value=608,
273
+ )
274
+
275
+ width = gr.Slider(
276
+ label="Width",
277
+ minimum=256,
278
+ maximum=MAX_IMAGE_SIZE,
279
+ step=16,
280
+ value=448,
281
+ )
282
+
283
+ with gr.Row():
284
+ num_frames = gr.Slider(
285
+ label="Number of frames to generate",
286
+ minimum=1.0,
287
+ maximum=257.0,
288
+ step=1,
289
+ value=24,
290
+ )
291
+
292
+ num_inference_steps = gr.Slider(
293
+ label="Number of inference steps",
294
+ minimum=1,
295
+ maximum=50,
296
+ step=1,
297
+ value=29,
298
+ )
299
+
300
+ fps = gr.Slider(
301
+ label="Frames per second",
302
+ minimum=1,
303
+ maximum=60,
304
+ step=1,
305
+ value=12,
306
+ )
307
+
308
+ # LoRA Selection
309
+ lora_names = gr.CheckboxGroup(
310
+ choices=list(TTV4ME_Loras.keys()),
311
+ label="Select TTV4ME LoRAs"
312
+ )
313
+
314
+ lora_weights = []
315
+ for i in range(len(TTV4ME_Loras)):
316
+ lora_weights.append(gr.Slider(
317
+ label=f"Weight for LoRA {i + 1}",
318
+ minimum=0.0,
319
+ maximum=1.0,
320
+ step=0.05,
321
+ value=0.5,
322
+ visible=False # Initially hidden
323
+ ))
324
+
325
+ def update_lora_visibility(selected_loras):
326
+ visibility = [lora in selected_loras for lora in TTV4ME_Loras.keys()]
327
+ return visibility
328
+
329
+ lora_names.change(
330
+ update_lora_visibility,
331
+ inputs=[lora_names],
332
+ outputs=lora_weights
333
+ )
334
+
335
+ # Event handling
336
+ input_components = [prompt, image_input, height, width, num_frames, num_inference_steps, seed, fps, lora_names]
337
+ input_components.extend(lora_weights)
338
+
339
+ run_button.click(
340
+ fn=generate,
341
+ inputs=input_components,
342
+ outputs=[result],
343
+ )
344
+
345
+ # Preset button handlers
346
+ preset_high_res.click(
347
+ fn=lambda: apply_preset("Higher Resolution"),
348
+ outputs=[height, width, num_frames, num_inference_steps, fps]
349
+ )
350
+
351
+ preset_more_frames.click(
352
+ fn=lambda: apply_preset("More Frames"),
353
+ outputs=[height, width, num_frames, num_inference_steps, fps]