Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
THis is the main file for the gradio web demo. It uses the
|
| 3 |
set environment variable OPENAI_API_KEY to use the OpenAI API to enhance the prompt.
|
| 4 |
|
| 5 |
Usage:
|
|
@@ -45,31 +45,31 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 45 |
#snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
|
| 46 |
quantization = int8_weight_only
|
| 47 |
|
| 48 |
-
transformer = CogVideoXTransformer3DModel.from_pretrained("THUDM/
|
| 49 |
-
text_encoder = T5EncoderModel.from_pretrained("THUDM/
|
| 50 |
-
vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/
|
| 51 |
quantize_(transformer, quantization())
|
| 52 |
quantize_(text_encoder, quantization())
|
| 53 |
# quantize_(vae, quantization())
|
| 54 |
|
| 55 |
pipe = CogVideoXPipeline.from_pretrained(
|
| 56 |
-
"THUDM/
|
| 57 |
text_encoder=text_encoder,
|
| 58 |
transformer=transformer,
|
| 59 |
vae=vae,
|
| 60 |
torch_dtype=torch.bfloat16
|
| 61 |
-
).to(
|
| 62 |
pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
|
| 63 |
|
| 64 |
-
pipe.enable_model_cpu_offload()
|
| 65 |
pipe.vae.enable_tiling()
|
| 66 |
pipe.vae.enable_slicing()
|
| 67 |
|
| 68 |
i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
|
| 69 |
-
"THUDM/
|
| 70 |
)
|
| 71 |
-
i2v_text_encoder = T5EncoderModel.from_pretrained("THUDM/
|
| 72 |
-
i2v_vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/
|
| 73 |
|
| 74 |
quantize_(i2v_transformer, quantization())
|
| 75 |
quantize_(i2v_text_encoder, quantization())
|
|
@@ -240,7 +240,7 @@ def infer(
|
|
| 240 |
if video_input is not None:
|
| 241 |
video = load_video(video_input)[:49] # Limit to 49 frames
|
| 242 |
pipe_video = CogVideoXVideoToVideoPipeline.from_pretrained(
|
| 243 |
-
"THUDM/
|
| 244 |
transformer=transformer,
|
| 245 |
vae=vae,
|
| 246 |
scheduler=pipe.scheduler,
|
|
@@ -249,7 +249,7 @@ def infer(
|
|
| 249 |
torch_dtype=torch.bfloat16,
|
| 250 |
).to(device)
|
| 251 |
|
| 252 |
-
pipe_video.enable_model_cpu_offload()
|
| 253 |
pipe_video.vae.enable_tiling()
|
| 254 |
pipe_video.vae.enable_slicing()
|
| 255 |
video_pt = pipe_video(
|
|
@@ -261,15 +261,15 @@ def infer(
|
|
| 261 |
use_dynamic_cfg=True,
|
| 262 |
output_type="pt",
|
| 263 |
guidance_scale=guidance_scale,
|
| 264 |
-
generator=torch.Generator(device=
|
| 265 |
).frames
|
| 266 |
-
pipe_video.to(
|
| 267 |
del pipe_video
|
| 268 |
gc.collect()
|
| 269 |
torch.cuda.empty_cache()
|
| 270 |
elif image_input is not None:
|
| 271 |
pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
|
| 272 |
-
"THUDM/
|
| 273 |
transformer=i2v_transformer,
|
| 274 |
vae=i2v_vae,
|
| 275 |
scheduler=pipe.scheduler,
|
|
@@ -287,9 +287,9 @@ def infer(
|
|
| 287 |
use_dynamic_cfg=True,
|
| 288 |
output_type="pt",
|
| 289 |
guidance_scale=guidance_scale,
|
| 290 |
-
generator=torch.Generator(device=
|
| 291 |
).frames
|
| 292 |
-
pipe_image.to(
|
| 293 |
del pipe_image
|
| 294 |
gc.collect()
|
| 295 |
torch.cuda.empty_cache()
|
|
@@ -303,9 +303,9 @@ def infer(
|
|
| 303 |
use_dynamic_cfg=True,
|
| 304 |
output_type="pt",
|
| 305 |
guidance_scale=guidance_scale,
|
| 306 |
-
generator=torch.Generator(device=
|
| 307 |
).frames
|
| 308 |
-
pipe.to(
|
| 309 |
gc.collect()
|
| 310 |
return (video_pt, seed)
|
| 311 |
|
|
@@ -342,17 +342,17 @@ examples_images = [["example_images/beach.png"], ["example_images/street.png"],
|
|
| 342 |
with gr.Blocks() as demo:
|
| 343 |
gr.Markdown("""
|
| 344 |
<div style="text-align: center; font-size: 32px; font-weight: bold; margin-bottom: 20px;">
|
| 345 |
-
|
| 346 |
</div>
|
| 347 |
<div style="text-align: center;">
|
| 348 |
-
<a href="https://huggingface.co/THUDM/
|
| 349 |
-
<a href="https://huggingface.co/THUDM/
|
| 350 |
<a href="https://github.com/THUDM/CogVideo">🌐 Github</a> |
|
| 351 |
<a href="https://arxiv.org/pdf/2408.06072">📜 arxiv </a>
|
| 352 |
</div>
|
| 353 |
<div style="text-align: center;display: flex;justify-content: center;align-items: center;margin-top: 1em;margin-bottom: .5em;">
|
| 354 |
<span>If the Space is too busy, duplicate it to use privately</span>
|
| 355 |
-
<a href="https://huggingface.co/spaces/tsqn/
|
| 356 |
margin-left: .75em;
|
| 357 |
"></a>
|
| 358 |
</div>
|
|
|
|
| 1 |
"""
|
| 2 |
+
THis is the main file for the gradio web demo. It uses the CogVideoX-5B model to generate videos gradio web demo.
|
| 3 |
set environment variable OPENAI_API_KEY to use the OpenAI API to enhance the prompt.
|
| 4 |
|
| 5 |
Usage:
|
|
|
|
| 45 |
#snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
|
| 46 |
quantization = int8_weight_only
|
| 47 |
|
| 48 |
+
transformer = CogVideoXTransformer3DModel.from_pretrained("THUDM/CogVideoX-5B", subfolder="transformer", torch_dtype=torch.bfloat16)
|
| 49 |
+
text_encoder = T5EncoderModel.from_pretrained("THUDM/CogVideoX-5B", subfolder="text_encoder", torch_dtype=torch.bfloat16)
|
| 50 |
+
vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/CogVideoX-5B", subfolder="vae", torch_dtype=torch.bfloat16)
|
| 51 |
quantize_(transformer, quantization())
|
| 52 |
quantize_(text_encoder, quantization())
|
| 53 |
# quantize_(vae, quantization())
|
| 54 |
|
| 55 |
pipe = CogVideoXPipeline.from_pretrained(
|
| 56 |
+
"THUDM/CogVideoX-5B",
|
| 57 |
text_encoder=text_encoder,
|
| 58 |
transformer=transformer,
|
| 59 |
vae=vae,
|
| 60 |
torch_dtype=torch.bfloat16
|
| 61 |
+
).to(device)
|
| 62 |
pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
|
| 63 |
|
| 64 |
+
# pipe.enable_model_cpu_offload()
|
| 65 |
pipe.vae.enable_tiling()
|
| 66 |
pipe.vae.enable_slicing()
|
| 67 |
|
| 68 |
i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
|
| 69 |
+
"THUDM/CogVideoX-5B-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
|
| 70 |
)
|
| 71 |
+
i2v_text_encoder = T5EncoderModel.from_pretrained("THUDM/CogVideoX-5B-I2V", subfolder="text_encoder", torch_dtype=torch.bfloat16)
|
| 72 |
+
i2v_vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/CogVideoX-5B-I2V", subfolder="vae", torch_dtype=torch.bfloat16)
|
| 73 |
|
| 74 |
quantize_(i2v_transformer, quantization())
|
| 75 |
quantize_(i2v_text_encoder, quantization())
|
|
|
|
| 240 |
if video_input is not None:
|
| 241 |
video = load_video(video_input)[:49] # Limit to 49 frames
|
| 242 |
pipe_video = CogVideoXVideoToVideoPipeline.from_pretrained(
|
| 243 |
+
"THUDM/CogVideoX-5B",
|
| 244 |
transformer=transformer,
|
| 245 |
vae=vae,
|
| 246 |
scheduler=pipe.scheduler,
|
|
|
|
| 249 |
torch_dtype=torch.bfloat16,
|
| 250 |
).to(device)
|
| 251 |
|
| 252 |
+
# pipe_video.enable_model_cpu_offload()
|
| 253 |
pipe_video.vae.enable_tiling()
|
| 254 |
pipe_video.vae.enable_slicing()
|
| 255 |
video_pt = pipe_video(
|
|
|
|
| 261 |
use_dynamic_cfg=True,
|
| 262 |
output_type="pt",
|
| 263 |
guidance_scale=guidance_scale,
|
| 264 |
+
generator=torch.Generator(device=device).manual_seed(seed),
|
| 265 |
).frames
|
| 266 |
+
pipe_video.to(device)
|
| 267 |
del pipe_video
|
| 268 |
gc.collect()
|
| 269 |
torch.cuda.empty_cache()
|
| 270 |
elif image_input is not None:
|
| 271 |
pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
|
| 272 |
+
"THUDM/CogVideoX-5B-I2V",
|
| 273 |
transformer=i2v_transformer,
|
| 274 |
vae=i2v_vae,
|
| 275 |
scheduler=pipe.scheduler,
|
|
|
|
| 287 |
use_dynamic_cfg=True,
|
| 288 |
output_type="pt",
|
| 289 |
guidance_scale=guidance_scale,
|
| 290 |
+
generator=torch.Generator(device=device).manual_seed(seed),
|
| 291 |
).frames
|
| 292 |
+
pipe_image.to(device)
|
| 293 |
del pipe_image
|
| 294 |
gc.collect()
|
| 295 |
torch.cuda.empty_cache()
|
|
|
|
| 303 |
use_dynamic_cfg=True,
|
| 304 |
output_type="pt",
|
| 305 |
guidance_scale=guidance_scale,
|
| 306 |
+
generator=torch.Generator(device=device).manual_seed(seed),
|
| 307 |
).frames
|
| 308 |
+
pipe.to(device)
|
| 309 |
gc.collect()
|
| 310 |
return (video_pt, seed)
|
| 311 |
|
|
|
|
| 342 |
with gr.Blocks() as demo:
|
| 343 |
gr.Markdown("""
|
| 344 |
<div style="text-align: center; font-size: 32px; font-weight: bold; margin-bottom: 20px;">
|
| 345 |
+
CogVideoX-5B Huggingface Space🤗
|
| 346 |
</div>
|
| 347 |
<div style="text-align: center;">
|
| 348 |
+
<a href="https://huggingface.co/THUDM/CogVideoX-5B">🤗 5B(T2V) Model Hub</a> |
|
| 349 |
+
<a href="https://huggingface.co/THUDM/CogVideoX-5B-I2V">🤗 5B(I2V) Model Hub</a> |
|
| 350 |
<a href="https://github.com/THUDM/CogVideo">🌐 Github</a> |
|
| 351 |
<a href="https://arxiv.org/pdf/2408.06072">📜 arxiv </a>
|
| 352 |
</div>
|
| 353 |
<div style="text-align: center;display: flex;justify-content: center;align-items: center;margin-top: 1em;margin-bottom: .5em;">
|
| 354 |
<span>If the Space is too busy, duplicate it to use privately</span>
|
| 355 |
+
<a href="https://huggingface.co/spaces/tsqn/CogVideoX-5B-Space?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" width="160" style="
|
| 356 |
margin-left: .75em;
|
| 357 |
"></a>
|
| 358 |
</div>
|