rahul7star commited on
Commit
961eee6
·
verified ·
1 Parent(s): 0b72293

Create app_4k.py

Browse files
Files changed (1) hide show
  1. app_4k.py +300 -0
app_4k.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from diffusers import AutoencoderKLWan, WanPipeline, UniPCMultistepScheduler
3
+ from diffusers.utils import export_to_video
4
+ import gradio as gr
5
+ import tempfile
6
+ import spaces
7
+ from huggingface_hub import hf_hub_download
8
+ import numpy as np
9
+ import random
10
+ import os
11
+
12
+
13
+
14
+ from huggingface_hub import snapshot_download
15
+
16
+ snapshot_download(repo_id="APRIL-AIGC/UltraWan", repo_type="model", local_dir="ultrawan_weights/UltraWan", resume_download=True)
17
+
18
+
19
+
20
+
21
+
22
+ # LIGHT WEIGHT 1.3b
23
+ # MODEL_ID = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
24
+ # LORA_REPO_ID = "Kijai/WanVideo_comfy"
25
+ # LORA_FILENAME = "Wan21_CausVid_bidirect2_T2V_1_3B_lora_rank32.safetensors"
26
+
27
+
28
+ MODEL_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
29
+
30
+
31
+
32
+
33
+ LORA_REPO_ID = "Kijai/WanVideo_comfy"
34
+ LORA_FILENAME = "Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank256_bf16.safetensors"
35
+ #LORA_FILENAME = "Pusa/Wan21_PusaV1_LoRA_14B_rank512_bf16.safetensors"
36
+
37
+
38
+
39
+
40
+ # LORA_REPO_ID = "RaphaelLiu/PusaV1"
41
+ # LORA_FILENAME="pusa_v1.safetensors"
42
+ #LORA_REPO_ID = "Kijai/WanVideo_comfy"
43
+ #LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+ vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
53
+ pipe = WanPipeline.from_pretrained(
54
+ MODEL_ID, vae=vae, torch_dtype=torch.bfloat16
55
+ )
56
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
57
+ pipe.to("cuda")
58
+
59
+ causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
60
+ pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
61
+ pipe.set_adapters(["causvid_lora"], adapter_weights=[0.95])
62
+ pipe.fuse_lora()
63
+
64
+ # MOD_VALUE = 32
65
+ # DEFAULT_H_SLIDER_VALUE = 512
66
+ # DEFAULT_W_SLIDER_VALUE = 896
67
+
68
+ # # Environment variable check
69
+ # IS_ORIGINAL_SPACE = os.environ.get("IS_ORIGINAL_SPACE", "True") == "True"
70
+
71
+ # # Original limits
72
+ # ORIGINAL_SLIDER_MIN_H, ORIGINAL_SLIDER_MAX_H = 128, 1280
73
+ # ORIGINAL_SLIDER_MIN_W, ORIGINAL_SLIDER_MAX_W = 128, 1280
74
+ # ORIGINAL_MAX_DURATION = round(81/24, 1) # MAX_FRAMES_MODEL/FIXED_FPS
75
+
76
+ # # Limited space constants
77
+ # LIMITED_MAX_RESOLUTION = 640
78
+ # LIMITED_MAX_DURATION = 2.0
79
+ # LIMITED_MAX_STEPS = 4
80
+
81
+ # # Set limits based on environment variable
82
+ # if IS_ORIGINAL_SPACE:
83
+ # SLIDER_MIN_H, SLIDER_MAX_H = 128, LIMITED_MAX_RESOLUTION
84
+ # SLIDER_MIN_W, SLIDER_MAX_W = 128, LIMITED_MAX_RESOLUTION
85
+ # MAX_DURATION = LIMITED_MAX_DURATION
86
+ # MAX_STEPS = LIMITED_MAX_STEPS
87
+ # else:
88
+ # SLIDER_MIN_H, SLIDER_MAX_H = ORIGINAL_SLIDER_MIN_H, ORIGINAL_SLIDER_MAX_H
89
+ # SLIDER_MIN_W, SLIDER_MAX_W = ORIGINAL_SLIDER_MIN_W, ORIGINAL_SLIDER_MAX_W
90
+ # MAX_DURATION = ORIGINAL_MAX_DURATION
91
+ # MAX_STEPS = 8
92
+
93
+ # MAX_SEED = np.iinfo(np.int32).max
94
+
95
+ # FIXED_FPS = 24
96
+ # FIXED_OUTPUT_FPS = 18 # we downspeed the output video as a temporary "trick"
97
+ # MIN_FRAMES_MODEL = 8
98
+ # MAX_FRAMES_MODEL = 81
99
+
100
+
101
+ #New math to make it High Res
102
+
103
+ MOD_VALUE = 32
104
+
105
+ # Defaults for higher-res generation
106
+ DEFAULT_H_SLIDER_VALUE = 768
107
+ DEFAULT_W_SLIDER_VALUE = 1344 # 16:9 friendly and divisible by MOD_VALUE
108
+
109
+ # Original Space = Hugging Face space with compute limits
110
+ IS_ORIGINAL_SPACE = os.environ.get("IS_ORIGINAL_SPACE", "True") == "True"
111
+
112
+ # Conservative limits for low-end environments
113
+ LIMITED_MAX_RESOLUTION = 640
114
+ LIMITED_MAX_DURATION = 2.0
115
+ LIMITED_MAX_STEPS = 4
116
+
117
+ # Generous limits for local or Pro spaces
118
+ ORIGINAL_SLIDER_MIN_H, ORIGINAL_SLIDER_MAX_H = 128, 1536
119
+ ORIGINAL_SLIDER_MIN_W, ORIGINAL_SLIDER_MAX_W = 128, 1536
120
+ ORIGINAL_MAX_DURATION = round(81 / 24, 1) # 3.4 seconds
121
+ ORIGINAL_MAX_STEPS = 8
122
+
123
+ # Use limited or original (generous) settings
124
+ if IS_ORIGINAL_SPACE:
125
+ SLIDER_MIN_H, SLIDER_MAX_H = 128, LIMITED_MAX_RESOLUTION
126
+ SLIDER_MIN_W, SLIDER_MAX_W = 128, LIMITED_MAX_RESOLUTION
127
+ MAX_DURATION = LIMITED_MAX_DURATION
128
+ MAX_STEPS = LIMITED_MAX_STEPS
129
+ else:
130
+ SLIDER_MIN_H, SLIDER_MAX_H = ORIGINAL_SLIDER_MIN_H, ORIGINAL_SLIDER_MAX_H
131
+ SLIDER_MIN_W, SLIDER_MAX_W = ORIGINAL_SLIDER_MIN_W, ORIGINAL_SLIDER_MAX_W
132
+ MAX_DURATION = ORIGINAL_MAX_DURATION
133
+ MAX_STEPS = ORIGINAL_MAX_STEPS
134
+
135
+ MAX_SEED = np.iinfo(np.int32).max
136
+
137
+ FIXED_FPS = 24
138
+ FIXED_OUTPUT_FPS = 18 # reduce final video FPS to save space
139
+ MIN_FRAMES_MODEL = 8
140
+ MAX_FRAMES_MODEL = 81
141
+
142
+
143
+ default_prompt_t2v = "cinematic footage, group of pedestrians dancing in the streets of NYC, high quality breakdance, 4K, tiktok video, intricate details, instagram feel, dynamic camera, smooth dance motion, dimly lit, stylish, beautiful faces, smiling, music video"
144
+ default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
145
+
146
+ def get_duration(prompt, height, width,
147
+ negative_prompt, duration_seconds,
148
+ guidance_scale, steps,
149
+ seed, randomize_seed,
150
+ progress):
151
+ if steps > 4 and duration_seconds > 2:
152
+ return 90
153
+ elif steps > 4 or duration_seconds > 2:
154
+ return 75
155
+ else:
156
+ return 60
157
+
158
+ @spaces.GPU(duration=get_duration)
159
+ def generate_video(prompt, height, width,
160
+ negative_prompt=default_negative_prompt, duration_seconds=2,
161
+ guidance_scale=1, steps=4,
162
+ seed=42, randomize_seed=False,
163
+ use_ultrawan_4k=False, # ✅ New toggle argument
164
+ progress=gr.Progress(track_tqdm=True)):
165
+
166
+ if not prompt or prompt.strip() == "":
167
+ raise gr.Error("Please enter a text prompt. Try to use long and precise descriptions.")
168
+
169
+ current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
170
+
171
+ # Decide whether to use UltraWan or regular model
172
+ if use_ultrawan_4k:
173
+ # ✅ Override with 4K resolution
174
+ target_h, target_w = 2160, 3840
175
+ steps = max(steps, 10)
176
+ guidance_scale = max(guidance_scale, 7.5)
177
+
178
+ # ✅ Lazy-load UltraWan model if not already loaded
179
+ global ultrawan_pipe
180
+ if "ultrawan_pipe" not in globals() or ultrawan_pipe is None:
181
+ from transformers import pipeline # or appropriate loader
182
+ ultrawan_pipe = load_model_from_path("ultrawan_weights/UltraWan")
183
+
184
+ generator_pipe = ultrawan_pipe
185
+ else:
186
+ # Clamp values in demo mode
187
+ if IS_ORIGINAL_SPACE:
188
+ height = min(height, LIMITED_MAX_RESOLUTION)
189
+ width = min(width, LIMITED_MAX_RESOLUTION)
190
+ duration_seconds = min(duration_seconds, LIMITED_MAX_DURATION)
191
+ steps = min(steps, LIMITED_MAX_STEPS)
192
+
193
+ # Ensure height/width are valid
194
+ target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
195
+ target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
196
+
197
+ generator_pipe = pipe # use your existing model
198
+
199
+ num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
200
+
201
+ # Run inference
202
+ with torch.inference_mode():
203
+ output_frames_list = generator_pipe(
204
+ prompt=prompt, negative_prompt=negative_prompt,
205
+ height=target_h, width=target_w, num_frames=num_frames,
206
+ guidance_scale=float(guidance_scale), num_inference_steps=int(steps),
207
+ generator=torch.Generator(device="cuda").manual_seed(current_seed)
208
+ ).frames[0]
209
+
210
+ # Save video
211
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
212
+ video_path = tmpfile.name
213
+ export_to_video(output_frames_list, video_path, fps=FIXED_OUTPUT_FPS)
214
+
215
+ return video_path, current_seed
216
+
217
+
218
+
219
+
220
+ with gr.Blocks(css="body { max-width: 100vw; overflow-x: hidden; }") as demo:
221
+ gr.HTML('<meta name="viewport" content="width=device-width, initial-scale=1">')
222
+ # ... your other components here ...
223
+ gr.Markdown("# ⚡ InstaVideo")
224
+ gr.Markdown("This Gradio space is a fork of [wan2-1-fast from multimodalart](https://huggingface.co/spaces/multimodalart/wan2-1-fast), and is powered by the Wan CausVid LoRA [from Kijai](https://huggingface.co/Kijai/WanVideo_comfy/blob/main/Wan21_CausVid_bidirect2_T2V_1_3B_lora_rank32.safetensors).")
225
+
226
+ # Add notice for limited spaces
227
+ if IS_ORIGINAL_SPACE:
228
+ gr.Markdown("⚠️ **This free public demo limits the resolution to 640px, duration to 2s, and inference steps to 4. For full capabilities please duplicate this space.**")
229
+
230
+ with gr.Row():
231
+ with gr.Column():
232
+ prompt_input = gr.Textbox(label="Prompt", value=default_prompt_t2v, placeholder="Describe the video you want to generate...")
233
+
234
+ with gr.Accordion("Advanced Settings", open=False):
235
+ negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
236
+ seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
237
+ randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
238
+ enable_4k_checkbox = gr.Checkbox(label="🎥 Generate 4K Video", value=False)
239
+ with gr.Row():
240
+ height_input = gr.Slider(
241
+ minimum=SLIDER_MIN_H,
242
+ maximum=SLIDER_MAX_H,
243
+ step=MOD_VALUE,
244
+ value=min(DEFAULT_H_SLIDER_VALUE, SLIDER_MAX_H),
245
+ label=f"Output Height (multiple of {MOD_VALUE})"
246
+ )
247
+ width_input = gr.Slider(
248
+ minimum=SLIDER_MIN_W,
249
+ maximum=SLIDER_MAX_W,
250
+ step=MOD_VALUE,
251
+ value=min(DEFAULT_W_SLIDER_VALUE, SLIDER_MAX_W),
252
+ label=f"Output Width (multiple of {MOD_VALUE})"
253
+ )
254
+ duration_seconds_input = gr.Slider(
255
+ minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1),
256
+ maximum=MAX_DURATION,
257
+ step=0.1,
258
+ value=2,
259
+ label="Duration (seconds)",
260
+ info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps."
261
+ )
262
+ steps_slider = gr.Slider(minimum=1, maximum=MAX_STEPS, step=1, value=4, label="Inference Steps")
263
+ guidance_scale_input = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale", visible=False)
264
+
265
+ generate_button = gr.Button("Generate Video", variant="primary")
266
+ with gr.Column():
267
+ video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
268
+
269
+ ui_inputs = [
270
+ prompt_input, height_input, width_input,
271
+ negative_prompt_input, duration_seconds_input,
272
+ guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox,
273
+ enable_4k_checkbox
274
+ ]
275
+ generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
276
+
277
+ # Adjust examples based on space limits
278
+ example_configs = [
279
+ ["a majestic eagle soaring through mountain peaks, cinematic aerial view", 896, 512],
280
+ ["a serene ocean wave crashing on a sandy beach at sunset", 448, 832],
281
+ ["a field of flowers swaying in the wind, spring morning light", 512, 896],
282
+ ]
283
+
284
+ if IS_ORIGINAL_SPACE:
285
+ # Limit example resolutions for limited spaces
286
+ example_configs = [
287
+ [example[0], min(example[1], LIMITED_MAX_RESOLUTION), min(example[2], LIMITED_MAX_RESOLUTION)]
288
+ for example in example_configs
289
+ ]
290
+
291
+ gr.Examples(
292
+ examples=example_configs,
293
+ inputs=[prompt_input, height_input, width_input],
294
+ outputs=[video_output, seed_input],
295
+ fn=generate_video,
296
+ cache_examples="lazy"
297
+ )
298
+
299
+ if __name__ == "__main__":
300
+ demo.queue().launch()