Spaces:

ford442
/

LTX-Video

Runtime error

ford442 commited on Dec 2, 2024

Commit

e447f3b

verified ·

1 Parent(s): 7c3b131

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -67,7 +67,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 request_log = []
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(device)
-clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(device)
 def compute_clip_embedding(text=None, image=None):
@@ -223,7 +223,7 @@ pipeline = XoraVideoPipeline(
     tokenizer=tokenizer,
     scheduler=scheduler,
     vae=vae,
-).to(device)
 @spaces.GPU(duration=80)
 def generate_video_from_text(
@@ -235,9 +235,9 @@ def generate_video_from_text(
     seed=random.randint(0, MAX_SEED),
     num_inference_steps=30,
     guidance_scale=4.2,
-    height=512,
     width=768,
-    num_frames=121,
     progress=gr.Progress(),
 ):
     if len(prompt.strip()) < 50:
@@ -339,7 +339,6 @@ def generate_video_from_image(
             original_resolution = f"{img.width}x{img.height}"  # Format as "widthxheight"
             clip_embedding = compute_clip_embedding(image=img)
     media_items = load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
     prompt = enhance_prompt_if_enabled(prompt, enhance_prompt_toggle, type="i2v")

 request_log = []
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(device)
+clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
 def compute_clip_embedding(text=None, image=None):
     tokenizer=tokenizer,
     scheduler=scheduler,
     vae=vae,
+).to(torch.bfloat16).to(device)
 @spaces.GPU(duration=80)
 def generate_video_from_text(
     seed=random.randint(0, MAX_SEED),
     num_inference_steps=30,
     guidance_scale=4.2,
+    height=768,
     width=768,
+    num_frames=60,
     progress=gr.Progress(),
 ):
     if len(prompt.strip()) < 50:
             original_resolution = f"{img.width}x{img.height}"  # Format as "widthxheight"
             clip_embedding = compute_clip_embedding(image=img)
     media_items = load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
     prompt = enhance_prompt_if_enabled(prompt, enhance_prompt_toggle, type="i2v")