Sergidev commited on
Commit
2dd154f
·
1 Parent(s): bd113ad
Files changed (1) hide show
  1. demo_app.py +264 -96
demo_app.py CHANGED
@@ -1,110 +1,278 @@
 
 
1
  import gradio as gr
2
- from PIL import Image
 
 
 
 
 
3
  import torch
4
- from diffusers import HunyuanVideoPipeline
5
- # ... other imports ...
6
-
7
- # Add LORA configuration
8
- LORA_LIST = [
9
- "Top_Off.safetensors",
10
- "huanyan_helper.safetensors",
11
- "huanyan_helper_alpha.safetensors",
12
- "hunyuan-t-solo-v1.0.safetensors",
13
- "stripe_v2.safetensors"
14
- ]
15
-
16
- def create_advanced_settings():
17
- with gr.Accordion("Advanced Settings", open=False):
18
- # LORA Selection
19
- lora_choices = gr.CheckboxGroup(
20
- choices=LORA_LIST,
21
- label="Select LORAs",
22
- value=[LORA_LIST[0]]
23
- )
24
 
25
- lora_weights = {}
26
- for lora in LORA_LIST:
27
- lora_weights[lora] = gr.Slider(0.0, 1.0, value=0.8,
28
- label=f"{lora} Weight")
 
 
29
 
30
- # Resolution Settings
31
- resolution = gr.Dropdown(
32
- choices=["512x512", "768x768", "1024x1024"],
33
- value="512x512",
34
- label="Output Resolution"
35
- )
36
 
37
- return lora_choices, lora_weights, resolution
38
-
39
- def validate_image_resolution(image, resolution):
40
- if image is None:
41
- return
42
- img = Image.open(image)
43
- w, h = img.size
44
- if f"{w}x{h}" != resolution:
45
- raise gr.Error(f"Image resolution ({w}x{h}) must match output resolution ({resolution})")
46
-
47
- def generate_video(prompt, negative_prompt, lora_choices, lora_weights,
48
- resolution, image_input=None, steps=30):
49
- # Validate image resolution if provided
50
- if image_input:
51
- validate_image_resolution(image_input, resolution)
52
-
53
- # Load base model
54
- pipe = HunyuanVideoPipeline.from_pretrained(
55
- "Tencent-Hunyuan/Hunyuan-Video-Lite",
56
- torch_dtype=torch.float16
57
- ).to("cuda")
58
-
59
- # Apply selected LORAs
60
- for lora in lora_choices:
61
- pipe.load_lora_weights(
62
- f"TTV4ME/{lora}",
63
- adapter_name="hunyuanvideo-lora",
64
- weight_name=lora_weights[lora]
65
- )
66
 
67
- # Generate from image or text
68
- if image_input:
69
- image = Image.open(image_input).convert("RGB")
70
- output = pipe.image_to_video(
71
- image,
72
- prompt=prompt,
73
- negative_prompt=negative_prompt,
74
- num_frames=24,
75
- height=int(resolution.split("x")[1]),
76
- width=int(resolution.split("x")[0]),
77
- num_inference_steps=steps
78
- )
79
- else:
80
- output = pipe.text_to_video(
81
- prompt=prompt,
82
- negative_prompt=negative_prompt,
83
- height=int(resolution.split("x")[1]),
84
- width=int(resolution.split("x")[0]),
85
- num_inference_steps=steps
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  )
87
 
88
- return output.video
 
 
 
 
 
 
 
89
 
90
- # Update interface
91
- with gr.Blocks() as demo:
92
- with gr.Row():
93
- with gr.Column():
94
- prompt = gr.Textbox(label="Prompt")
95
- negative_prompt = gr.Textbox(label="Negative Prompt")
96
- image_input = gr.Image(label="Input Image", type="filepath")
97
 
98
- lora_choices, lora_weights, resolution = create_advanced_settings()
 
 
99
 
100
- generate_btn = gr.Button("Generate Video")
 
101
 
102
- with gr.Column():
103
- output_video = gr.Video(label="Generated Video")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- generate_btn.click(
106
- fn=generate_video,
107
- inputs=[prompt, negative_prompt, lora_choices,
108
- lora_weights, resolution, image_input],
109
- outputs=output_video
110
  )
 
1
+ import spaces
2
+ import gc
3
  import gradio as gr
4
+ import numpy as np
5
+ import os
6
+ from pathlib import Path
7
+ from diffusers import GGUFQuantizationConfig, HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
8
+ from diffusers.utils import export_to_video
9
+ from huggingface_hub import snapshot_download
10
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # Configuration
13
+ gc.collect()
14
+ torch.cuda.empty_cache()
15
+ torch.set_grad_enabled(False)
16
+ torch.backends.cudnn.deterministic = True
17
+ torch.backends.cudnn.benchmark = False
18
 
19
+ # Load base model
20
+ model_id = "hunyuanvideo-community/HunyuanVideo"
21
+ base_path = f"/home/user/app/{model_id}"
22
+ os.makedirs(base_path, exist_ok=True)
23
+ snapshot_download(repo_id=model_id, local_dir=base_path)
 
24
 
25
+ # Load transformer
26
+ ckp_path = Path(base_path)
27
+ gguf_filename = "hunyuan-video-t2v-720p-Q4_0.gguf"
28
+ transformer_path = f"https://huggingface.co/city96/HunyuanVideo-gguf/blob/main/{gguf_filename}"
29
+ transformer = HunyuanVideoTransformer3DModel.from_single_file(
30
+ transformer_path,
31
+ quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
32
+ torch_dtype=torch.bfloat16,
33
+ ).to('cuda')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # Initialize pipeline
36
+ pipe = HunyuanVideoPipeline.from_pretrained(
37
+ ckp_path,
38
+ transformer=transformer,
39
+ torch_dtype=torch.float16
40
+ ).to("cuda")
41
+
42
+ # Configure VAE
43
+ pipe.vae.enable_tiling()
44
+ pipe.vae.enable_slicing()
45
+ pipe.vae.eval()
46
+
47
+ # Load multiple LoRA adapters
48
+ pipe.load_lora_weights(
49
+ "Sergidev/TTV4ME", # Private repository
50
+ weight_name="stripe_v2.safetensors",
51
+ adapter_name="hunyuanvideo-lora",
52
+ token=os.environ.get("HF_TOKEN") # Access token from Space secrets
53
+ )
54
+
55
+ pipe.load_lora_weights(
56
+ "Sergidev/TTV4ME", # Private repository
57
+ weight_name="Top_Off.safetensors",
58
+ token=os.environ.get("HF_TOKEN") # Access token from Space secrets
59
+ )
60
+
61
+ pipe.load_lora_weights(
62
+ "sergidev/IllustrationTTV",
63
+ weight_name="hunyuan_flat_color_v2.safetensors",
64
+ adapter_name="hyvid_lora_adapter"
65
+ )
66
+
67
+ # Set combined adapter weights
68
+ pipe.set_adapters(["hunyuanvideo-lora", "hyvid_lora_adapter"], [0.9, 0.8])
69
+
70
+ # Memory cleanup
71
+ gc.collect()
72
+ torch.cuda.empty_cache()
73
+
74
+ # Remaining code unchanged...
75
+ MAX_SEED = np.iinfo(np.int32).max
76
+ MAX_IMAGE_SIZE = 1024
77
+
78
+ @spaces.GPU(duration=300)
79
+ def generate(
80
+ prompt,
81
+ height,
82
+ width,
83
+ num_frames,
84
+ num_inference_steps,
85
+ seed_value,
86
+ fps,
87
+ progress=gr.Progress(track_tqdm=True)
88
+ ):
89
+ with torch.cuda.device(0):
90
+ if seed_value == -1:
91
+ seed_value = torch.randint(0, MAX_SEED, (1,)).item()
92
+ generator = torch.Generator('cuda').manual_seed(seed_value)
93
+
94
+ with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad():
95
+ output = pipe(
96
+ prompt=prompt,
97
+ height=height,
98
+ width=width,
99
+ num_frames=num_frames,
100
+ num_inference_steps=num_inference_steps,
101
+ generator=generator,
102
+ ).frames[0]
103
+
104
+ output_path = "output.mp4"
105
+ export_to_video(output, output_path, fps=fps)
106
+ torch.cuda.empty_cache()
107
+ gc.collect()
108
+ return output_path
109
+
110
+ def apply_preset(preset_name, *current_values):
111
+ if preset_name == "Higher Resolution":
112
+ return [608, 448, 24, 29, 12]
113
+ elif preset_name == "More Frames":
114
+ return [512, 320, 42, 27, 14]
115
+ return current_values
116
+
117
+ css = """
118
+ #col-container {
119
+ margin: 0 auto;
120
+ max-width: 850px;
121
+ }
122
+
123
+ .dark-theme {
124
+ background-color: #1f1f1f;
125
+ color: #ffffff;
126
+ }
127
+
128
+ .container {
129
+ margin: 0 auto;
130
+ padding: 20px;
131
+ border-radius: 10px;
132
+ background-color: #2d2d2d;
133
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
134
+ }
135
+
136
+ .title {
137
+ text-align: center;
138
+ margin-bottom: 1em;
139
+ color: #ffffff;
140
+ }
141
+
142
+ .description {
143
+ text-align: center;
144
+ margin-bottom: 2em;
145
+ color: #cccccc;
146
+ font-size: 0.95em;
147
+ line-height: 1.5;
148
+ }
149
+
150
+ .prompt-container {
151
+ background-color: #363636;
152
+ padding: 15px;
153
+ border-radius: 8px;
154
+ margin-bottom: 1em;
155
+ width: 100%;
156
+ }
157
+
158
+ .prompt-textbox {
159
+ min-height: 80px !important;
160
+ }
161
+
162
+ .preset-buttons {
163
+ display: flex;
164
+ gap: 10px;
165
+ justify-content: center;
166
+ margin-bottom: 1em;
167
+ }
168
+
169
+ .support-text {
170
+ text-align: center;
171
+ margin-top: 1em;
172
+ color: #cccccc;
173
+ font-size: 0.9em;
174
+ }
175
+
176
+ a {
177
+ color: #00a7e1;
178
+ text-decoration: none;
179
+ }
180
+
181
+ a:hover {
182
+ text-decoration: underline;
183
+ }
184
+ """
185
+
186
+ with gr.Blocks(css=css, theme="dark") as demo:
187
+ with gr.Column(elem_id="col-container"):
188
+ gr.Markdown("# 🎬 Anime TTV", elem_classes=["title"])
189
+ gr.Markdown(
190
+ """Duplicate of Illustration TTV but for Anime. May be unpredictable. THIS IS A PRO VERSION: you may need an account. as the generation duration is 300.
191
+ This space uses the 'hunyuan flat color v2' LORA by Motimalu to generate better 2d animated sequences. Prompt only handles 77 tokens.
192
+
193
+ If you find this useful, please consider giving the space a ❤️ and supporting me on [Ko-Fi](https://ko-fi.com/sergidev)!""",
194
+ elem_classes=["description"]
195
  )
196
 
197
+ with gr.Column(elem_classes=["prompt-container"]):
198
+ prompt = gr.Textbox(
199
+ label="Prompt",
200
+ placeholder="Enter your prompt here (Include the terms 'flat color, no lineart, blending' for 2d illustration)",
201
+ show_label=False,
202
+ elem_classes=["prompt-textbox"],
203
+ lines=3
204
+ )
205
 
206
+ with gr.Row():
207
+ run_button = gr.Button("🎨 Generate", variant="primary", size="lg")
 
 
 
 
 
208
 
209
+ with gr.Row(elem_classes=["preset-buttons"]):
210
+ preset_high_res = gr.Button("📺 Higher Resolution Preset")
211
+ preset_more_frames = gr.Button("🎞️ More Frames Preset")
212
 
213
+ with gr.Row():
214
+ result = gr.Video(label="Generated Video")
215
 
216
+ with gr.Accordion("⚙️ Advanced Settings", open=False):
217
+ seed = gr.Slider(
218
+ label="Seed (-1 for random)",
219
+ minimum=-1,
220
+ maximum=MAX_SEED,
221
+ step=1,
222
+ value=-1,
223
+ )
224
+ with gr.Row():
225
+ height = gr.Slider(
226
+ label="Height",
227
+ minimum=256,
228
+ maximum=MAX_IMAGE_SIZE,
229
+ step=16,
230
+ value=608,
231
+ )
232
+ width = gr.Slider(
233
+ label="Width",
234
+ minimum=256,
235
+ maximum=MAX_IMAGE_SIZE,
236
+ step=16,
237
+ value=448,
238
+ )
239
+ with gr.Row():
240
+ num_frames = gr.Slider(
241
+ label="Number of frames to generate",
242
+ minimum=1.0,
243
+ maximum=257.0,
244
+ step=1,
245
+ value=24,
246
+ )
247
+ num_inference_steps = gr.Slider(
248
+ label="Number of inference steps",
249
+ minimum=1,
250
+ maximum=50,
251
+ step=1,
252
+ value=29,
253
+ )
254
+ fps = gr.Slider(
255
+ label="Frames per second",
256
+ minimum=1,
257
+ maximum=60,
258
+ step=1,
259
+ value=12,
260
+ )
261
+
262
+ # Event handling
263
+ run_button.click(
264
+ fn=generate,
265
+ inputs=[prompt, height, width, num_frames, num_inference_steps, seed, fps],
266
+ outputs=[result],
267
+ )
268
+
269
+ # Preset button handlers
270
+ preset_high_res.click(
271
+ fn=lambda: apply_preset("Higher Resolution"),
272
+ outputs=[height, width, num_frames, num_inference_steps, fps]
273
+ )
274
 
275
+ preset_more_frames.click(
276
+ fn=lambda: apply_preset("More Frames"),
277
+ outputs=[height, width, num_frames, num_inference_steps, fps]
 
 
278
  )