tsqn commited on
Commit
96cc85c
·
verified ·
1 Parent(s): bc1d5c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- THis is the main file for the gradio web demo. It uses the CogVideoX1.5-5B model to generate videos gradio web demo.
3
  set environment variable OPENAI_API_KEY to use the OpenAI API to enhance the prompt.
4
 
5
  Usage:
@@ -45,31 +45,31 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
45
  #snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
46
  quantization = int8_weight_only
47
 
48
- transformer = CogVideoXTransformer3DModel.from_pretrained("THUDM/CogVideoX1.5-5B", subfolder="transformer", torch_dtype=torch.bfloat16)
49
- text_encoder = T5EncoderModel.from_pretrained("THUDM/CogVideoX1.5-5B", subfolder="text_encoder", torch_dtype=torch.bfloat16)
50
- vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/CogVideoX1.5-5B", subfolder="vae", torch_dtype=torch.bfloat16)
51
  quantize_(transformer, quantization())
52
  quantize_(text_encoder, quantization())
53
  # quantize_(vae, quantization())
54
 
55
  pipe = CogVideoXPipeline.from_pretrained(
56
- "THUDM/CogVideoX1.5-5B",
57
  text_encoder=text_encoder,
58
  transformer=transformer,
59
  vae=vae,
60
  torch_dtype=torch.bfloat16
61
- ).to("cpu")
62
  pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
63
 
64
- pipe.enable_model_cpu_offload()
65
  pipe.vae.enable_tiling()
66
  pipe.vae.enable_slicing()
67
 
68
  i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
69
- "THUDM/CogVideoX1.5-5B-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
70
  )
71
- i2v_text_encoder = T5EncoderModel.from_pretrained("THUDM/CogVideoX1.5-5B-I2V", subfolder="text_encoder", torch_dtype=torch.bfloat16)
72
- i2v_vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/CogVideoX1.5-5B-I2V", subfolder="vae", torch_dtype=torch.bfloat16)
73
 
74
  quantize_(i2v_transformer, quantization())
75
  quantize_(i2v_text_encoder, quantization())
@@ -240,7 +240,7 @@ def infer(
240
  if video_input is not None:
241
  video = load_video(video_input)[:49] # Limit to 49 frames
242
  pipe_video = CogVideoXVideoToVideoPipeline.from_pretrained(
243
- "THUDM/CogVideoX1.5-5B-",
244
  transformer=transformer,
245
  vae=vae,
246
  scheduler=pipe.scheduler,
@@ -249,7 +249,7 @@ def infer(
249
  torch_dtype=torch.bfloat16,
250
  ).to(device)
251
 
252
- pipe_video.enable_model_cpu_offload()
253
  pipe_video.vae.enable_tiling()
254
  pipe_video.vae.enable_slicing()
255
  video_pt = pipe_video(
@@ -261,15 +261,15 @@ def infer(
261
  use_dynamic_cfg=True,
262
  output_type="pt",
263
  guidance_scale=guidance_scale,
264
- generator=torch.Generator(device="cpu").manual_seed(seed),
265
  ).frames
266
- pipe_video.to("cpu")
267
  del pipe_video
268
  gc.collect()
269
  torch.cuda.empty_cache()
270
  elif image_input is not None:
271
  pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
272
- "THUDM/CogVideoX1.5-5B-I2V",
273
  transformer=i2v_transformer,
274
  vae=i2v_vae,
275
  scheduler=pipe.scheduler,
@@ -287,9 +287,9 @@ def infer(
287
  use_dynamic_cfg=True,
288
  output_type="pt",
289
  guidance_scale=guidance_scale,
290
- generator=torch.Generator(device="cpu").manual_seed(seed),
291
  ).frames
292
- pipe_image.to("cpu")
293
  del pipe_image
294
  gc.collect()
295
  torch.cuda.empty_cache()
@@ -303,9 +303,9 @@ def infer(
303
  use_dynamic_cfg=True,
304
  output_type="pt",
305
  guidance_scale=guidance_scale,
306
- generator=torch.Generator(device="cpu").manual_seed(seed),
307
  ).frames
308
- pipe.to("cpu")
309
  gc.collect()
310
  return (video_pt, seed)
311
 
@@ -342,17 +342,17 @@ examples_images = [["example_images/beach.png"], ["example_images/street.png"],
342
  with gr.Blocks() as demo:
343
  gr.Markdown("""
344
  <div style="text-align: center; font-size: 32px; font-weight: bold; margin-bottom: 20px;">
345
- CogVideoX1.5-5B Huggingface Space🤗
346
  </div>
347
  <div style="text-align: center;">
348
- <a href="https://huggingface.co/THUDM/CogVideoX1.5-5B">🤗 5B(T2V) Model Hub</a> |
349
- <a href="https://huggingface.co/THUDM/CogVideoX1.5-5B-I2V">🤗 5B(I2V) Model Hub</a> |
350
  <a href="https://github.com/THUDM/CogVideo">🌐 Github</a> |
351
  <a href="https://arxiv.org/pdf/2408.06072">📜 arxiv </a>
352
  </div>
353
  <div style="text-align: center;display: flex;justify-content: center;align-items: center;margin-top: 1em;margin-bottom: .5em;">
354
  <span>If the Space is too busy, duplicate it to use privately</span>
355
- <a href="https://huggingface.co/spaces/tsqn/CogVideoX1.5-5B-Space?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" width="160" style="
356
  margin-left: .75em;
357
  "></a>
358
  </div>
 
1
  """
2
+ THis is the main file for the gradio web demo. It uses the CogVideoX-5B model to generate videos gradio web demo.
3
  set environment variable OPENAI_API_KEY to use the OpenAI API to enhance the prompt.
4
 
5
  Usage:
 
45
  #snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
46
  quantization = int8_weight_only
47
 
48
+ transformer = CogVideoXTransformer3DModel.from_pretrained("THUDM/CogVideoX-5B", subfolder="transformer", torch_dtype=torch.bfloat16)
49
+ text_encoder = T5EncoderModel.from_pretrained("THUDM/CogVideoX-5B", subfolder="text_encoder", torch_dtype=torch.bfloat16)
50
+ vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/CogVideoX-5B", subfolder="vae", torch_dtype=torch.bfloat16)
51
  quantize_(transformer, quantization())
52
  quantize_(text_encoder, quantization())
53
  # quantize_(vae, quantization())
54
 
55
  pipe = CogVideoXPipeline.from_pretrained(
56
+ "THUDM/CogVideoX-5B",
57
  text_encoder=text_encoder,
58
  transformer=transformer,
59
  vae=vae,
60
  torch_dtype=torch.bfloat16
61
+ ).to(device)
62
  pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
63
 
64
+ # pipe.enable_model_cpu_offload()
65
  pipe.vae.enable_tiling()
66
  pipe.vae.enable_slicing()
67
 
68
  i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
69
+ "THUDM/CogVideoX-5B-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
70
  )
71
+ i2v_text_encoder = T5EncoderModel.from_pretrained("THUDM/CogVideoX-5B-I2V", subfolder="text_encoder", torch_dtype=torch.bfloat16)
72
+ i2v_vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/CogVideoX-5B-I2V", subfolder="vae", torch_dtype=torch.bfloat16)
73
 
74
  quantize_(i2v_transformer, quantization())
75
  quantize_(i2v_text_encoder, quantization())
 
240
  if video_input is not None:
241
  video = load_video(video_input)[:49] # Limit to 49 frames
242
  pipe_video = CogVideoXVideoToVideoPipeline.from_pretrained(
243
+ "THUDM/CogVideoX-5B",
244
  transformer=transformer,
245
  vae=vae,
246
  scheduler=pipe.scheduler,
 
249
  torch_dtype=torch.bfloat16,
250
  ).to(device)
251
 
252
+ # pipe_video.enable_model_cpu_offload()
253
  pipe_video.vae.enable_tiling()
254
  pipe_video.vae.enable_slicing()
255
  video_pt = pipe_video(
 
261
  use_dynamic_cfg=True,
262
  output_type="pt",
263
  guidance_scale=guidance_scale,
264
+ generator=torch.Generator(device=device).manual_seed(seed),
265
  ).frames
266
+ pipe_video.to(device)
267
  del pipe_video
268
  gc.collect()
269
  torch.cuda.empty_cache()
270
  elif image_input is not None:
271
  pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
272
+ "THUDM/CogVideoX-5B-I2V",
273
  transformer=i2v_transformer,
274
  vae=i2v_vae,
275
  scheduler=pipe.scheduler,
 
287
  use_dynamic_cfg=True,
288
  output_type="pt",
289
  guidance_scale=guidance_scale,
290
+ generator=torch.Generator(device=device).manual_seed(seed),
291
  ).frames
292
+ pipe_image.to(device)
293
  del pipe_image
294
  gc.collect()
295
  torch.cuda.empty_cache()
 
303
  use_dynamic_cfg=True,
304
  output_type="pt",
305
  guidance_scale=guidance_scale,
306
+ generator=torch.Generator(device=device).manual_seed(seed),
307
  ).frames
308
+ pipe.to(device)
309
  gc.collect()
310
  return (video_pt, seed)
311
 
 
342
  with gr.Blocks() as demo:
343
  gr.Markdown("""
344
  <div style="text-align: center; font-size: 32px; font-weight: bold; margin-bottom: 20px;">
345
+ CogVideoX-5B Huggingface Space🤗
346
  </div>
347
  <div style="text-align: center;">
348
+ <a href="https://huggingface.co/THUDM/CogVideoX-5B">🤗 5B(T2V) Model Hub</a> |
349
+ <a href="https://huggingface.co/THUDM/CogVideoX-5B-I2V">🤗 5B(I2V) Model Hub</a> |
350
  <a href="https://github.com/THUDM/CogVideo">🌐 Github</a> |
351
  <a href="https://arxiv.org/pdf/2408.06072">📜 arxiv </a>
352
  </div>
353
  <div style="text-align: center;display: flex;justify-content: center;align-items: center;margin-top: 1em;margin-bottom: .5em;">
354
  <span>If the Space is too busy, duplicate it to use privately</span>
355
+ <a href="https://huggingface.co/spaces/tsqn/CogVideoX-5B-Space?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" width="160" style="
356
  margin-left: .75em;
357
  "></a>
358
  </div>