Update app.py
Browse files
app.py
CHANGED
|
@@ -67,7 +67,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
| 67 |
request_log = []
|
| 68 |
|
| 69 |
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(device)
|
| 70 |
-
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
|
| 71 |
|
| 72 |
|
| 73 |
def compute_clip_embedding(text=None, image=None):
|
|
@@ -223,7 +223,7 @@ pipeline = XoraVideoPipeline(
|
|
| 223 |
tokenizer=tokenizer,
|
| 224 |
scheduler=scheduler,
|
| 225 |
vae=vae,
|
| 226 |
-
).to(device)
|
| 227 |
|
| 228 |
@spaces.GPU(duration=80)
|
| 229 |
def generate_video_from_text(
|
|
@@ -235,9 +235,9 @@ def generate_video_from_text(
|
|
| 235 |
seed=random.randint(0, MAX_SEED),
|
| 236 |
num_inference_steps=30,
|
| 237 |
guidance_scale=4.2,
|
| 238 |
-
height=
|
| 239 |
width=768,
|
| 240 |
-
num_frames=
|
| 241 |
progress=gr.Progress(),
|
| 242 |
):
|
| 243 |
if len(prompt.strip()) < 50:
|
|
@@ -339,7 +339,6 @@ def generate_video_from_image(
|
|
| 339 |
original_resolution = f"{img.width}x{img.height}" # Format as "widthxheight"
|
| 340 |
clip_embedding = compute_clip_embedding(image=img)
|
| 341 |
|
| 342 |
-
|
| 343 |
media_items = load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
|
| 344 |
|
| 345 |
prompt = enhance_prompt_if_enabled(prompt, enhance_prompt_toggle, type="i2v")
|
|
|
|
| 67 |
request_log = []
|
| 68 |
|
| 69 |
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(device)
|
| 70 |
+
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
|
| 71 |
|
| 72 |
|
| 73 |
def compute_clip_embedding(text=None, image=None):
|
|
|
|
| 223 |
tokenizer=tokenizer,
|
| 224 |
scheduler=scheduler,
|
| 225 |
vae=vae,
|
| 226 |
+
).to(torch.bfloat16).to(device)
|
| 227 |
|
| 228 |
@spaces.GPU(duration=80)
|
| 229 |
def generate_video_from_text(
|
|
|
|
| 235 |
seed=random.randint(0, MAX_SEED),
|
| 236 |
num_inference_steps=30,
|
| 237 |
guidance_scale=4.2,
|
| 238 |
+
height=768,
|
| 239 |
width=768,
|
| 240 |
+
num_frames=60,
|
| 241 |
progress=gr.Progress(),
|
| 242 |
):
|
| 243 |
if len(prompt.strip()) < 50:
|
|
|
|
| 339 |
original_resolution = f"{img.width}x{img.height}" # Format as "widthxheight"
|
| 340 |
clip_embedding = compute_clip_embedding(image=img)
|
| 341 |
|
|
|
|
| 342 |
media_items = load_image_to_tensor_with_resize(image_path, height, width).to(device).detach()
|
| 343 |
|
| 344 |
prompt = enhance_prompt_if_enabled(prompt, enhance_prompt_toggle, type="i2v")
|