Sergidev commited on
Commit
5dfd9f8
·
1 Parent(s): 2dd154f
Files changed (4) hide show
  1. app.py +1 -2
  2. demo_app.py +122 -176
  3. requirements.txt +10 -46
  4. utils.py +18 -33
app.py CHANGED
@@ -2,6 +2,5 @@ from utils import install_packages
2
 
3
  if __name__ == "__main__":
4
  install_packages()
5
-
6
  from demo_app import demo
7
- demo.queue(max_size=20).launch()
 
2
 
3
  if __name__ == "__main__":
4
  install_packages()
 
5
  from demo_app import demo
6
+ demo.queue(max_size=15).launch()
demo_app.py CHANGED
@@ -1,272 +1,218 @@
1
  import spaces
2
- import gc
3
  import gradio as gr
4
  import numpy as np
5
  import os
 
 
6
  from pathlib import Path
7
- from diffusers import GGUFQuantizationConfig, HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
8
- from diffusers.utils import export_to_video
9
  from huggingface_hub import snapshot_download
10
- import torch
11
 
12
  # Configuration
13
- gc.collect()
14
- torch.cuda.empty_cache()
15
- torch.set_grad_enabled(False)
16
- torch.backends.cudnn.deterministic = True
17
- torch.backends.cudnn.benchmark = False
18
-
19
- # Load base model
20
- model_id = "hunyuanvideo-community/HunyuanVideo"
21
- base_path = f"/home/user/app/{model_id}"
22
- os.makedirs(base_path, exist_ok=True)
23
- snapshot_download(repo_id=model_id, local_dir=base_path)
24
 
25
- # Load transformer
26
- ckp_path = Path(base_path)
27
- gguf_filename = "hunyuan-video-t2v-720p-Q4_0.gguf"
28
- transformer_path = f"https://huggingface.co/city96/HunyuanVideo-gguf/blob/main/{gguf_filename}"
29
- transformer = HunyuanVideoTransformer3DModel.from_single_file(
30
- transformer_path,
31
- quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
32
- torch_dtype=torch.bfloat16,
33
- ).to('cuda')
34
 
35
- # Initialize pipeline
 
36
  pipe = HunyuanVideoPipeline.from_pretrained(
37
- ckp_path,
38
- transformer=transformer,
39
  torch_dtype=torch.float16
40
  ).to("cuda")
41
 
42
- # Configure VAE
43
- pipe.vae.enable_tiling()
44
- pipe.vae.enable_slicing()
45
- pipe.vae.eval()
46
-
47
- # Load multiple LoRA adapters
48
- pipe.load_lora_weights(
49
- "Sergidev/TTV4ME", # Private repository
50
- weight_name="stripe_v2.safetensors",
51
- adapter_name="hunyuanvideo-lora",
52
- token=os.environ.get("HF_TOKEN") # Access token from Space secrets
53
- )
54
-
55
- pipe.load_lora_weights(
56
- "Sergidev/TTV4ME", # Private repository
57
- weight_name="Top_Off.safetensors",
58
- token=os.environ.get("HF_TOKEN") # Access token from Space secrets
59
- )
60
-
61
- pipe.load_lora_weights(
62
- "sergidev/IllustrationTTV",
63
- weight_name="hunyuan_flat_color_v2.safetensors",
64
- adapter_name="hyvid_lora_adapter"
65
- )
66
-
67
- # Set combined adapter weights
68
- pipe.set_adapters(["hunyuanvideo-lora", "hyvid_lora_adapter"], [0.9, 0.8])
69
-
70
- # Memory cleanup
71
- gc.collect()
72
- torch.cuda.empty_cache()
73
-
74
- # Remaining code unchanged...
75
- MAX_SEED = np.iinfo(np.int32).max
76
- MAX_IMAGE_SIZE = 1024
77
 
78
  @spaces.GPU(duration=300)
79
  def generate(
80
  prompt,
 
81
  height,
82
  width,
83
  num_frames,
84
  num_inference_steps,
85
  seed_value,
86
  fps,
 
 
87
  progress=gr.Progress(track_tqdm=True)
88
  ):
89
- with torch.cuda.device(0):
90
- if seed_value == -1:
91
- seed_value = torch.randint(0, MAX_SEED, (1,)).item()
92
- generator = torch.Generator('cuda').manual_seed(seed_value)
93
-
94
- with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad():
95
- output = pipe(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  prompt=prompt,
97
  height=height,
98
  width=width,
99
  num_frames=num_frames,
100
  num_inference_steps=num_inference_steps,
101
  generator=generator,
102
- ).frames[0]
103
 
104
- output_path = "output.mp4"
105
- export_to_video(output, output_path, fps=fps)
106
  torch.cuda.empty_cache()
107
- gc.collect()
108
- return output_path
109
 
110
- def apply_preset(preset_name, *current_values):
111
  if preset_name == "Higher Resolution":
112
  return [608, 448, 24, 29, 12]
113
  elif preset_name == "More Frames":
114
  return [512, 320, 42, 27, 14]
115
- return current_values
116
 
117
  css = """
118
- #col-container {
119
- margin: 0 auto;
120
- max-width: 850px;
121
- }
122
-
123
- .dark-theme {
124
- background-color: #1f1f1f;
125
- color: #ffffff;
126
- }
127
-
128
- .container {
129
- margin: 0 auto;
130
- padding: 20px;
131
- border-radius: 10px;
132
- background-color: #2d2d2d;
133
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
134
- }
135
-
136
- .title {
137
- text-align: center;
138
- margin-bottom: 1em;
139
- color: #ffffff;
140
- }
141
-
142
- .description {
143
- text-align: center;
144
- margin-bottom: 2em;
145
- color: #cccccc;
146
- font-size: 0.95em;
147
- line-height: 1.5;
148
- }
149
-
150
- .prompt-container {
151
- background-color: #363636;
152
- padding: 15px;
153
- border-radius: 8px;
154
- margin-bottom: 1em;
155
- width: 100%;
156
- }
157
-
158
- .prompt-textbox {
159
- min-height: 80px !important;
160
- }
161
-
162
- .preset-buttons {
163
- display: flex;
164
- gap: 10px;
165
- justify-content: center;
166
- margin-bottom: 1em;
167
- }
168
-
169
- .support-text {
170
- text-align: center;
171
- margin-top: 1em;
172
- color: #cccccc;
173
- font-size: 0.9em;
174
- }
175
-
176
- a {
177
- color: #00a7e1;
178
- text-decoration: none;
179
- }
180
-
181
- a:hover {
182
- text-decoration: underline;
183
- }
184
  """
185
 
186
  with gr.Blocks(css=css, theme="dark") as demo:
187
  with gr.Column(elem_id="col-container"):
188
- gr.Markdown("# 🎬 Anime TTV", elem_classes=["title"])
189
  gr.Markdown(
190
- """Duplicate of Illustration TTV but for Anime. May be unpredictable. THIS IS A PRO VERSION: you may need an account. as the generation duration is 300.
191
- This space uses the 'hunyuan flat color v2' LORA by Motimalu to generate better 2d animated sequences. Prompt only handles 77 tokens.
192
-
193
- If you find this useful, please consider giving the space a ❤️ and supporting me on [Ko-Fi](https://ko-fi.com/sergidev)!""",
194
  elem_classes=["description"]
195
  )
196
 
197
  with gr.Column(elem_classes=["prompt-container"]):
198
  prompt = gr.Textbox(
199
  label="Prompt",
200
- placeholder="Enter your prompt here (Include the terms 'flat color, no lineart, blending' for 2d illustration)",
201
- show_label=False,
202
  elem_classes=["prompt-textbox"],
203
  lines=3
204
  )
 
 
 
 
 
205
 
206
  with gr.Row():
207
- run_button = gr.Button("🎨 Generate", variant="primary", size="lg")
208
 
209
  with gr.Row(elem_classes=["preset-buttons"]):
210
- preset_high_res = gr.Button("📺 Higher Resolution Preset")
211
- preset_more_frames = gr.Button("🎞️ More Frames Preset")
212
 
213
  with gr.Row():
214
  result = gr.Video(label="Generated Video")
215
 
216
  with gr.Accordion("⚙️ Advanced Settings", open=False):
217
- seed = gr.Slider(
218
- label="Seed (-1 for random)",
219
- minimum=-1,
220
- maximum=MAX_SEED,
221
- step=1,
222
- value=-1,
223
- )
 
 
224
  with gr.Row():
225
  height = gr.Slider(
226
  label="Height",
227
  minimum=256,
228
  maximum=MAX_IMAGE_SIZE,
229
  step=16,
230
- value=608,
231
  )
232
  width = gr.Slider(
233
  label="Width",
234
  minimum=256,
235
  maximum=MAX_IMAGE_SIZE,
236
  step=16,
237
- value=448,
238
  )
 
239
  with gr.Row():
240
  num_frames = gr.Slider(
241
- label="Number of frames to generate",
242
- minimum=1.0,
243
- maximum=257.0,
244
  step=1,
245
  value=24,
246
  )
247
  num_inference_steps = gr.Slider(
248
- label="Number of inference steps",
249
  minimum=1,
250
  maximum=50,
251
  step=1,
252
- value=29,
253
  )
254
- fps = gr.Slider(
255
- label="Frames per second",
256
- minimum=1,
257
- maximum=60,
258
- step=1,
259
- value=12,
260
- )
 
 
 
 
 
 
 
 
 
 
261
 
262
  # Event handling
263
  run_button.click(
264
  fn=generate,
265
- inputs=[prompt, height, width, num_frames, num_inference_steps, seed, fps],
266
- outputs=[result],
 
267
  )
268
 
269
- # Preset button handlers
270
  preset_high_res.click(
271
  fn=lambda: apply_preset("Higher Resolution"),
272
  outputs=[height, width, num_frames, num_inference_steps, fps]
 
1
  import spaces
 
2
  import gradio as gr
3
  import numpy as np
4
  import os
5
+ import torch
6
+ from PIL import Image
7
  from pathlib import Path
8
+ from diffusers import HunyuanVideoPipeline
 
9
  from huggingface_hub import snapshot_download
 
10
 
11
  # Configuration
12
+ LORA_CHOICES = [
13
+ "Top_Off.safetensors",
14
+ "huanyan_helper.safetensors",
15
+ "huanyan_helper_alpha.safetensors",
16
+ "hunyuan-t-solo-v1.0.safetensors",
17
+ "stripe_v2.safetensors"
18
+ ]
 
 
 
 
19
 
20
+ MAX_SEED = np.iinfo(np.int32).max
21
+ MAX_IMAGE_SIZE = 1024
 
 
 
 
 
 
 
22
 
23
+ # Initialize pipeline with ZeroGPU optimizations
24
+ model_id = "Tencent-Hunyuan/Hunyuan-Video-Lite"
25
  pipe = HunyuanVideoPipeline.from_pretrained(
26
+ model_id,
 
27
  torch_dtype=torch.float16
28
  ).to("cuda")
29
 
30
+ # Load all available LoRAs
31
+ for lora_file in LORA_CHOICES:
32
+ try:
33
+ pipe.load_lora_weights(
34
+ "Sergidev/TTV4ME",
35
+ weight_name=lora_file,
36
+ adapter_name=lora_file.split('.')[0],
37
+ token=os.environ.get("HF_TOKEN")
38
+ )
39
+ except Exception as e:
40
+ print(f"Error loading {lora_file}: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  @spaces.GPU(duration=300)
43
  def generate(
44
  prompt,
45
+ image_input,
46
  height,
47
  width,
48
  num_frames,
49
  num_inference_steps,
50
  seed_value,
51
  fps,
52
+ selected_loras,
53
+ lora_weights,
54
  progress=gr.Progress(track_tqdm=True)
55
  ):
56
+ # Image validation
57
+ if image_input is not None:
58
+ img = Image.open(image_input)
59
+ if img.size != (width, height):
60
+ raise gr.Error(f"Image resolution {img.size} must match video resolution {width}x{height}")
61
+ prompt = f"Image prompt: {prompt}" if prompt else "Based on uploaded image"
62
+
63
+ # Set active LoRAs
64
+ active_adapters = []
65
+ adapter_weights = []
66
+ for idx, selected in enumerate(selected_loras):
67
+ if selected:
68
+ active_adapters.append(LORA_CHOICES[idx].split('.')[0])
69
+ adapter_weights.append(lora_weights[idx])
70
+
71
+ if active_adapters:
72
+ pipe.set_adapters(active_adapters, adapter_weights)
73
+
74
+ # Generation logic
75
+ torch.cuda.empty_cache()
76
+ if seed_value == -1:
77
+ seed_value = torch.randint(0, MAX_SEED, (1,)).item()
78
+
79
+ generator = torch.Generator('cuda').manual_seed(seed_value)
80
+
81
+ try:
82
+ if image_input:
83
+ output = pipe.image_to_video(
84
+ Image.open(image_input).convert("RGB"),
85
+ prompt=prompt,
86
+ height=height,
87
+ width=width,
88
+ num_frames=num_frames,
89
+ num_inference_steps=num_inference_steps,
90
+ generator=generator,
91
+ )
92
+ else:
93
+ output = pipe.text_to_video(
94
  prompt=prompt,
95
  height=height,
96
  width=width,
97
  num_frames=num_frames,
98
  num_inference_steps=num_inference_steps,
99
  generator=generator,
100
+ )
101
 
102
+ return output.video
103
+ finally:
104
  torch.cuda.empty_cache()
 
 
105
 
106
+ def apply_preset(preset_name):
107
  if preset_name == "Higher Resolution":
108
  return [608, 448, 24, 29, 12]
109
  elif preset_name == "More Frames":
110
  return [512, 320, 42, 27, 14]
111
+ return [512, 512, 24, 25, 12]
112
 
113
  css = """
114
+ /* Existing CSS remains unchanged */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  """
116
 
117
  with gr.Blocks(css=css, theme="dark") as demo:
118
  with gr.Column(elem_id="col-container"):
119
+ gr.Markdown("# 🎬 Hunyuan Studio", elem_classes=["title"])
120
  gr.Markdown(
121
+ """Text-to-Video & Image-to-Video generation with multiple LoRA adapters.<br>
122
+ Ensure image resolution matches selected video dimensions.""",
 
 
123
  elem_classes=["description"]
124
  )
125
 
126
  with gr.Column(elem_classes=["prompt-container"]):
127
  prompt = gr.Textbox(
128
  label="Prompt",
129
+ placeholder="Enter text prompt or describe the image...",
 
130
  elem_classes=["prompt-textbox"],
131
  lines=3
132
  )
133
+ image_input = gr.Image(
134
+ label="Upload Reference Image (Optional)",
135
+ type="filepath",
136
+ visible=True
137
+ )
138
 
139
  with gr.Row():
140
+ run_button = gr.Button("🎬 Generate Video", variant="primary", size="lg")
141
 
142
  with gr.Row(elem_classes=["preset-buttons"]):
143
+ preset_high_res = gr.Button("📺 Resolution Preset")
144
+ preset_more_frames = gr.Button("🎞️ Frames Preset")
145
 
146
  with gr.Row():
147
  result = gr.Video(label="Generated Video")
148
 
149
  with gr.Accordion("⚙️ Advanced Settings", open=False):
150
+ with gr.Row():
151
+ seed = gr.Slider(
152
+ label="Seed (-1 for random)",
153
+ minimum=-1,
154
+ maximum=MAX_SEED,
155
+ step=1,
156
+ value=-1,
157
+ )
158
+
159
  with gr.Row():
160
  height = gr.Slider(
161
  label="Height",
162
  minimum=256,
163
  maximum=MAX_IMAGE_SIZE,
164
  step=16,
165
+ value=512,
166
  )
167
  width = gr.Slider(
168
  label="Width",
169
  minimum=256,
170
  maximum=MAX_IMAGE_SIZE,
171
  step=16,
172
+ value=512,
173
  )
174
+
175
  with gr.Row():
176
  num_frames = gr.Slider(
177
+ label="Frame Count",
178
+ minimum=1,
179
+ maximum=257,
180
  step=1,
181
  value=24,
182
  )
183
  num_inference_steps = gr.Slider(
184
+ label="Inference Steps",
185
  minimum=1,
186
  maximum=50,
187
  step=1,
188
+ value=25,
189
  )
190
+ fps = gr.Slider(
191
+ label="FPS",
192
+ minimum=1,
193
+ maximum=60,
194
+ step=1,
195
+ value=12,
196
+ )
197
+
198
+ with gr.Accordion("🧩 LoRA Configuration", open=False):
199
+ lora_checkboxes = []
200
+ lora_sliders = []
201
+ for lora in LORA_CHOICES:
202
+ with gr.Row():
203
+ cb = gr.Checkbox(label=f"Enable {lora}", value=False)
204
+ sl = gr.Slider(0.0, 1.0, value=0.8, label=f"{lora} Weight")
205
+ lora_checkboxes.append(cb)
206
+ lora_sliders.append(sl)
207
 
208
  # Event handling
209
  run_button.click(
210
  fn=generate,
211
+ inputs=[prompt, image_input, height, width, num_frames,
212
+ num_inference_steps, seed, fps, lora_checkboxes, lora_sliders],
213
+ outputs=result
214
  )
215
 
 
216
  preset_high_res.click(
217
  fn=lambda: apply_preset("Higher Resolution"),
218
  outputs=[height, width, num_frames, num_inference_steps, fps]
requirements.txt CHANGED
@@ -1,48 +1,12 @@
1
  --extra-index-url https://download.pytorch.org/whl/cu124
2
- bitsandbytes
3
- decord
4
- einops
5
- facexlib
6
- ftfy
7
- gguf
8
- git+https://github.com/huggingface/accelerate.git@main#egg=accelerate
9
- git+https://github.com/huggingface/diffusers.git@main#egg=diffusers
10
- git+https://github.com/huggingface/transformers.git@main#egg=transformers
11
- gradio
12
- hf_transfer
13
- huggingface_hub
14
- imageio
15
- imageio-ffmpeg
16
- insightface
17
- invisible_watermark
18
- matplotlib
19
- moviepy==1.0.3
20
  numpy<2.0
21
- onnxruntime
22
- onnxruntime-gpu
23
- omegaconf
24
- opencv-python
25
- opencv-python-headless
26
- git+https://github.com/huggingface/optimum-quanto
27
- packaging
28
- patch_conv
29
- Pillow==10.2.0
30
- psutil
31
- safetensors
32
- scipy
33
- scikit-learn
34
- scikit-image
35
- scikit-video
36
- sentencepiece
37
- setuptools
38
- spaces
39
- timm
40
- tokenizers>=0.13.3
41
- torch<2.6.0,>=2.4.0
42
- torchao
43
- torchaudio
44
- torchsde
45
- torchvision
46
- tqdm
47
- wheel
48
- git+https://github.com/huggingface/peft.git
 
1
  --extra-index-url https://download.pytorch.org/whl/cu124
2
+ diffusers==0.29.0
3
+ transformers==4.41.0
4
+ gradio>=4.0.0
5
+ torch>=2.4.0,<2.6.0
6
+ safetensors>=0.4.2
7
+ huggingface_hub>=0.23.0
8
+ imageio>=2.34.0
9
+ opencv-python-headless>=4.9.0
10
+ Pillow>=10.2.0
 
 
 
 
 
 
 
 
 
11
  numpy<2.0
12
+ accelerate>=0.30.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils.py CHANGED
@@ -3,38 +3,23 @@ def install_packages():
3
  import sys
4
  import importlib
5
 
6
- def _is_package_available(name) -> bool:
7
- try:
8
- importlib.import_module(name)
9
- return True
10
- except (ImportError, ModuleNotFoundError):
11
- return False
 
 
 
 
 
12
 
13
- # upgrade pip
14
- subprocess.run(
15
- f"{sys.executable} -m pip install --upgrade pip", shell=True, check=True
16
- )
17
- subprocess.run(
18
- f"{sys.executable} -m pip install --upgrade ninja wheel setuptools packaging", shell=True, check=True
19
- )
20
 
21
- # install ninja
22
- if not _is_package_available("ninja"):
23
- subprocess.run(f"{sys.executable} -m pip install ninja nvidia-cudnn-cu12==9.1.0.70 nvidia-cublas-cu12==12.4.5.8 torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu124", shell=True, check=True)
24
-
25
- # install flash attention
26
- if not _is_package_available("flash_attn"):
27
- subprocess.run(
28
- f"{sys.executable} -m pip install -v -U flash-attention --no-build-isolation",
29
- env={"MAX_JOBS": "1"},
30
- shell=True,
31
- check=True
32
- )
33
-
34
- # install xformers
35
- if not _is_package_available("xformers"):
36
- subprocess.run(
37
- f"{sys.executable} -m pip install -v -U xformers nvidia-cudnn-cu12==9.1.0.70 nvidia-cublas-cu12==12.4.5.8 torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu124",
38
- shell=True,
39
- check=True
40
- )
 
3
  import sys
4
  import importlib
5
 
6
+ required = [
7
+ 'torch>=2.4.0,<2.6.0',
8
+ 'diffusers',
9
+ 'transformers',
10
+ 'gradio',
11
+ 'safetensors',
12
+ 'huggingface_hub',
13
+ 'imageio',
14
+ 'opencv-python-headless',
15
+ 'Pillow'
16
+ ]
17
 
18
+ subprocess.run([
19
+ sys.executable, "-m", "pip", "install",
20
+ "--upgrade", "pip", "setuptools", "wheel"
21
+ ], check=True)
 
 
 
22
 
23
+ subprocess.run([
24
+ sys.executable, "-m", "pip", "install"
25
+ ] + required, check=True)