Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
"""
|
2 |
-
THis is the main file for the gradio web demo. It uses the
|
3 |
set environment variable OPENAI_API_KEY to use the OpenAI API to enhance the prompt.
|
4 |
|
5 |
Usage:
|
@@ -45,31 +45,31 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
45 |
#snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
|
46 |
quantization = int8_weight_only
|
47 |
|
48 |
-
transformer = CogVideoXTransformer3DModel.from_pretrained("THUDM/
|
49 |
-
text_encoder = T5EncoderModel.from_pretrained("THUDM/
|
50 |
-
vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/
|
51 |
quantize_(transformer, quantization())
|
52 |
quantize_(text_encoder, quantization())
|
53 |
# quantize_(vae, quantization())
|
54 |
|
55 |
pipe = CogVideoXPipeline.from_pretrained(
|
56 |
-
"THUDM/
|
57 |
text_encoder=text_encoder,
|
58 |
transformer=transformer,
|
59 |
vae=vae,
|
60 |
torch_dtype=torch.bfloat16
|
61 |
-
).to(
|
62 |
pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
|
63 |
|
64 |
-
pipe.enable_model_cpu_offload()
|
65 |
pipe.vae.enable_tiling()
|
66 |
pipe.vae.enable_slicing()
|
67 |
|
68 |
i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
|
69 |
-
"THUDM/
|
70 |
)
|
71 |
-
i2v_text_encoder = T5EncoderModel.from_pretrained("THUDM/
|
72 |
-
i2v_vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/
|
73 |
|
74 |
quantize_(i2v_transformer, quantization())
|
75 |
quantize_(i2v_text_encoder, quantization())
|
@@ -240,7 +240,7 @@ def infer(
|
|
240 |
if video_input is not None:
|
241 |
video = load_video(video_input)[:49] # Limit to 49 frames
|
242 |
pipe_video = CogVideoXVideoToVideoPipeline.from_pretrained(
|
243 |
-
"THUDM/
|
244 |
transformer=transformer,
|
245 |
vae=vae,
|
246 |
scheduler=pipe.scheduler,
|
@@ -249,7 +249,7 @@ def infer(
|
|
249 |
torch_dtype=torch.bfloat16,
|
250 |
).to(device)
|
251 |
|
252 |
-
pipe_video.enable_model_cpu_offload()
|
253 |
pipe_video.vae.enable_tiling()
|
254 |
pipe_video.vae.enable_slicing()
|
255 |
video_pt = pipe_video(
|
@@ -261,15 +261,15 @@ def infer(
|
|
261 |
use_dynamic_cfg=True,
|
262 |
output_type="pt",
|
263 |
guidance_scale=guidance_scale,
|
264 |
-
generator=torch.Generator(device=
|
265 |
).frames
|
266 |
-
pipe_video.to(
|
267 |
del pipe_video
|
268 |
gc.collect()
|
269 |
torch.cuda.empty_cache()
|
270 |
elif image_input is not None:
|
271 |
pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
|
272 |
-
"THUDM/
|
273 |
transformer=i2v_transformer,
|
274 |
vae=i2v_vae,
|
275 |
scheduler=pipe.scheduler,
|
@@ -287,9 +287,9 @@ def infer(
|
|
287 |
use_dynamic_cfg=True,
|
288 |
output_type="pt",
|
289 |
guidance_scale=guidance_scale,
|
290 |
-
generator=torch.Generator(device=
|
291 |
).frames
|
292 |
-
pipe_image.to(
|
293 |
del pipe_image
|
294 |
gc.collect()
|
295 |
torch.cuda.empty_cache()
|
@@ -303,9 +303,9 @@ def infer(
|
|
303 |
use_dynamic_cfg=True,
|
304 |
output_type="pt",
|
305 |
guidance_scale=guidance_scale,
|
306 |
-
generator=torch.Generator(device=
|
307 |
).frames
|
308 |
-
pipe.to(
|
309 |
gc.collect()
|
310 |
return (video_pt, seed)
|
311 |
|
@@ -342,17 +342,17 @@ examples_images = [["example_images/beach.png"], ["example_images/street.png"],
|
|
342 |
with gr.Blocks() as demo:
|
343 |
gr.Markdown("""
|
344 |
<div style="text-align: center; font-size: 32px; font-weight: bold; margin-bottom: 20px;">
|
345 |
-
|
346 |
</div>
|
347 |
<div style="text-align: center;">
|
348 |
-
<a href="https://huggingface.co/THUDM/
|
349 |
-
<a href="https://huggingface.co/THUDM/
|
350 |
<a href="https://github.com/THUDM/CogVideo">🌐 Github</a> |
|
351 |
<a href="https://arxiv.org/pdf/2408.06072">📜 arxiv </a>
|
352 |
</div>
|
353 |
<div style="text-align: center;display: flex;justify-content: center;align-items: center;margin-top: 1em;margin-bottom: .5em;">
|
354 |
<span>If the Space is too busy, duplicate it to use privately</span>
|
355 |
-
<a href="https://huggingface.co/spaces/tsqn/
|
356 |
margin-left: .75em;
|
357 |
"></a>
|
358 |
</div>
|
|
|
1 |
"""
|
2 |
+
THis is the main file for the gradio web demo. It uses the CogVideoX-5B model to generate videos gradio web demo.
|
3 |
set environment variable OPENAI_API_KEY to use the OpenAI API to enhance the prompt.
|
4 |
|
5 |
Usage:
|
|
|
45 |
#snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
|
46 |
quantization = int8_weight_only
|
47 |
|
48 |
+
transformer = CogVideoXTransformer3DModel.from_pretrained("THUDM/CogVideoX-5B", subfolder="transformer", torch_dtype=torch.bfloat16)
|
49 |
+
text_encoder = T5EncoderModel.from_pretrained("THUDM/CogVideoX-5B", subfolder="text_encoder", torch_dtype=torch.bfloat16)
|
50 |
+
vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/CogVideoX-5B", subfolder="vae", torch_dtype=torch.bfloat16)
|
51 |
quantize_(transformer, quantization())
|
52 |
quantize_(text_encoder, quantization())
|
53 |
# quantize_(vae, quantization())
|
54 |
|
55 |
pipe = CogVideoXPipeline.from_pretrained(
|
56 |
+
"THUDM/CogVideoX-5B",
|
57 |
text_encoder=text_encoder,
|
58 |
transformer=transformer,
|
59 |
vae=vae,
|
60 |
torch_dtype=torch.bfloat16
|
61 |
+
).to(device)
|
62 |
pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
|
63 |
|
64 |
+
# pipe.enable_model_cpu_offload()
|
65 |
pipe.vae.enable_tiling()
|
66 |
pipe.vae.enable_slicing()
|
67 |
|
68 |
i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
|
69 |
+
"THUDM/CogVideoX-5B-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
|
70 |
)
|
71 |
+
i2v_text_encoder = T5EncoderModel.from_pretrained("THUDM/CogVideoX-5B-I2V", subfolder="text_encoder", torch_dtype=torch.bfloat16)
|
72 |
+
i2v_vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/CogVideoX-5B-I2V", subfolder="vae", torch_dtype=torch.bfloat16)
|
73 |
|
74 |
quantize_(i2v_transformer, quantization())
|
75 |
quantize_(i2v_text_encoder, quantization())
|
|
|
240 |
if video_input is not None:
|
241 |
video = load_video(video_input)[:49] # Limit to 49 frames
|
242 |
pipe_video = CogVideoXVideoToVideoPipeline.from_pretrained(
|
243 |
+
"THUDM/CogVideoX-5B",
|
244 |
transformer=transformer,
|
245 |
vae=vae,
|
246 |
scheduler=pipe.scheduler,
|
|
|
249 |
torch_dtype=torch.bfloat16,
|
250 |
).to(device)
|
251 |
|
252 |
+
# pipe_video.enable_model_cpu_offload()
|
253 |
pipe_video.vae.enable_tiling()
|
254 |
pipe_video.vae.enable_slicing()
|
255 |
video_pt = pipe_video(
|
|
|
261 |
use_dynamic_cfg=True,
|
262 |
output_type="pt",
|
263 |
guidance_scale=guidance_scale,
|
264 |
+
generator=torch.Generator(device=device).manual_seed(seed),
|
265 |
).frames
|
266 |
+
pipe_video.to(device)
|
267 |
del pipe_video
|
268 |
gc.collect()
|
269 |
torch.cuda.empty_cache()
|
270 |
elif image_input is not None:
|
271 |
pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
|
272 |
+
"THUDM/CogVideoX-5B-I2V",
|
273 |
transformer=i2v_transformer,
|
274 |
vae=i2v_vae,
|
275 |
scheduler=pipe.scheduler,
|
|
|
287 |
use_dynamic_cfg=True,
|
288 |
output_type="pt",
|
289 |
guidance_scale=guidance_scale,
|
290 |
+
generator=torch.Generator(device=device).manual_seed(seed),
|
291 |
).frames
|
292 |
+
pipe_image.to(device)
|
293 |
del pipe_image
|
294 |
gc.collect()
|
295 |
torch.cuda.empty_cache()
|
|
|
303 |
use_dynamic_cfg=True,
|
304 |
output_type="pt",
|
305 |
guidance_scale=guidance_scale,
|
306 |
+
generator=torch.Generator(device=device).manual_seed(seed),
|
307 |
).frames
|
308 |
+
pipe.to(device)
|
309 |
gc.collect()
|
310 |
return (video_pt, seed)
|
311 |
|
|
|
342 |
with gr.Blocks() as demo:
|
343 |
gr.Markdown("""
|
344 |
<div style="text-align: center; font-size: 32px; font-weight: bold; margin-bottom: 20px;">
|
345 |
+
CogVideoX-5B Huggingface Space🤗
|
346 |
</div>
|
347 |
<div style="text-align: center;">
|
348 |
+
<a href="https://huggingface.co/THUDM/CogVideoX-5B">🤗 5B(T2V) Model Hub</a> |
|
349 |
+
<a href="https://huggingface.co/THUDM/CogVideoX-5B-I2V">🤗 5B(I2V) Model Hub</a> |
|
350 |
<a href="https://github.com/THUDM/CogVideo">🌐 Github</a> |
|
351 |
<a href="https://arxiv.org/pdf/2408.06072">📜 arxiv </a>
|
352 |
</div>
|
353 |
<div style="text-align: center;display: flex;justify-content: center;align-items: center;margin-top: 1em;margin-bottom: .5em;">
|
354 |
<span>If the Space is too busy, duplicate it to use privately</span>
|
355 |
+
<a href="https://huggingface.co/spaces/tsqn/CogVideoX-5B-Space?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" width="160" style="
|
356 |
margin-left: .75em;
|
357 |
"></a>
|
358 |
</div>
|