Spaces:

Sergidev
/

Illustration-Text-To-Video

Paused

App Files Files Community

Sergidev commited on Feb 15

Commit

d83ac8a

1 Parent(s): 0a69c30

v1

Browse files

Files changed (6) hide show

README.md +2 -2
app.py +7 -0
demo_app.py +230 -0
packages.txt +4 -0
requirements.txt +48 -0
utils.py +40 -0

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Anime TextToVideo
-emoji: 📈
 colorFrom: pink
-colorTo: yellow
 sdk: gradio
 sdk_version: 5.16.0
 app_file: app.py

 ---
 title: Anime TextToVideo
+emoji: ✨
 colorFrom: pink
+colorTo: purple
 sdk: gradio
 sdk_version: 5.16.0
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from utils import install_packages
+if __name__ == "__main__":
+    install_packages()
+    from demo_app import demo
+    demo.queue(max_size=20).launch()

demo_app.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import spaces
+import gc
+import gradio as gr
+import numpy as np
+import os
+from pathlib import Path
+from diffusers import GGUFQuantizationConfig, HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
+from diffusers.utils import export_to_video
+from huggingface_hub import snapshot_download
+import torch
+gc.collect()
+torch.cuda.empty_cache()
+torch.set_grad_enabled(False)
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = False
+model_id = "hunyuanvideo-community/HunyuanVideo"
+base_path = f"/home/user/app/{model_id}"
+os.makedirs(base_path, exist_ok=True)
+snapshot_download(repo_id=model_id, local_dir=base_path)
+ckp_path = Path(base_path)
+gguf_filename = "hunyuan-video-t2v-720p-Q4_0.gguf"
+transformer_path = f"https://huggingface.co/city96/HunyuanVideo-gguf/blob/main/{gguf_filename}"
+transformer = HunyuanVideoTransformer3DModel.from_single_file(
+    transformer_path,
+    quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
+    torch_dtype=torch.bfloat16,
+)
+transformer = transformer.to('cuda')
+pipe = HunyuanVideoPipeline.from_pretrained(
+    ckp_path,
+    transformer=transformer,
+    torch_dtype=torch.float16
+)
+if pipe.text_encoder:
+    pipe.text_encoder = pipe.text_encoder.to('cuda')
+    pipe.text_encoder.eval()
+pipe.vae.enable_tiling()
+pipe.vae.enable_slicing()
+pipe.vae.eval()
+pipe.vae = pipe.vae.to("cuda")
+pipe = pipe.to("cuda")
+pipe.load_lora_weights(
+    "calcuis/hyvid",
+    weight_name="hyvid-lora-mila3d.safetensors",
+    adapter_name="hyvid_lora_adapter"
+)
+pipe.set_adapters("hyvid_lora_adapter", 1.2)
+gc.collect()
+torch.cuda.empty_cache()
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 1024
+@spaces.GPU(duration=120)  # Adjusted duration to 120
+def generate(
+    prompt,
+    height,
+    width,
+    num_frames,
+    num_inference_steps,
+    seed_value,
+    fps,
+    progress=gr.Progress(track_tqdm=True)
+):
+    with torch.cuda.device(0):
+        if seed_value == -1:
+            seed_value = torch.randint(0, MAX_SEED, (1,)).item()
+        generator = torch.Generator('cuda').manual_seed(seed_value)
+        with torch.amp.autocast_mode.autocast('cuda', dtype=torch.bfloat16), torch.inference_mode(), torch.no_grad():
+            output = pipe(
+                prompt=prompt,
+                height=height,
+                width=width,
+                num_frames=num_frames,
+                num_inference_steps=num_inference_steps,
+                generator=generator,
+            ).frames[0]
+        output_path = "output.mp4"
+        export_to_video(output, output_path, fps=fps)  # Use user-defined fps
+        torch.cuda.empty_cache()
+    gc.collect()
+    return output_path
+# Gradio Interface
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 850px;
+}
+.dark-theme {
+    background-color: #1f1f1f;
+    color: #ffffff;
+}
+.container {
+    margin: 0 auto;
+    padding: 20px;
+    border-radius: 10px;
+    background-color: #2d2d2d;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+}
+.title {
+    text-align: center;
+    margin-bottom: 1em;
+    color: #ffffff;
+}
+.description {
+    text-align: center;
+    margin-bottom: 2em;
+    color: #cccccc;
+    font-size: 0.95em;
+    line-height: 1.5;
+}
+.prompt-container {
+    background-color: #363636;
+    padding: 15px;
+    border-radius: 8px;
+    margin-bottom: 1em;
+}
+.support-text {
+    text-align: center;
+    margin-top: 1em;
+    color: #cccccc;
+    font-size: 0.9em;
+}
+a {
+    color: #00a7e1;
+    text-decoration: none;
+}
+a:hover {
+    text-decoration: underline;
+}
+"""
+with gr.Blocks(css=css, theme="dark") as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown("# 🎬 Anime TTV", elem_classes=["title"])
+        gr.Markdown(
+            """Transform your text descriptions into anime-style videos using state-of-the-art AI technology.
+            This space uses the HunyuanVideo model to generate high-quality animated sequences.
+            If you find this useful, please consider ❤️ hearting the space and supporting me on [Ko-Fi](https://ko-fi.com/sergidev)!""",
+            elem_classes=["description"]
+        )
+        with gr.Row(elem_classes=["prompt-container"]):
+            prompt = gr.Text(
+                label="Prompt",
+                placeholder="Enter your prompt here (e.g., 'a cute anime girl walking in a garden')",
+                show_label=False,
+            )
+            run_button = gr.Button("🎨 Generate", variant="primary")
+        with gr.Row():
+            result = gr.Video(label="Generated Video")
+        with gr.Accordion("⚙️ Advanced Settings", open=False):
+            seed = gr.Slider(
+                label="Seed (-1 for random)",
+                minimum=-1,
+                maximum=MAX_SEED,
+                step=1,
+                value=-1,
+            )
+            with gr.Row():
+                height = gr.Slider(  # Fixed order of height and width to match intended use
+                    label="Height",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=16,  # Make divisible by 16
+                    value=512,
+                )
+                width = gr.Slider(
+                    label="Width",
+                    minimum=256,
+                    maximum=MAX_IMAGE_SIZE,
+                    step=16,
+                    value=320,
+                )
+            with gr.Row():
+                num_frames = gr.Slider(
+                    label="Number of frames to generate",
+                    minimum=1.0,
+                    maximum=257.0,
+                    step=1,
+                    value=42,
+                )
+                num_inference_steps = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=1,
+                    maximum=50,
+                    step=1,
+                    value=30,
+                )
+            fps = gr.Slider(
+                label="Frames per second",
+                minimum=1,
+                maximum=60,
+                step=1,
+                value=14,
+            )
+    # Event handling
+    run_button.click(
+        fn=generate,
+        inputs=[prompt, height, width, num_frames, num_inference_steps, seed, fps],
+        # Added fps to inputs, fixed height/width order
+        outputs=[result],
+    )
+# The demo.queue and demo.launch are handled in app.py

packages.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+ffmpeg
+python3-imageio
+cmake
+libstdc++6

requirements.txt ADDED Viewed

	@@ -0,0 +1,48 @@

+--extra-index-url https://download.pytorch.org/whl/cu124
+bitsandbytes
+decord
+einops
+facexlib
+ftfy
+gguf
+git+https://github.com/huggingface/accelerate.git@main#egg=accelerate
+git+https://github.com/huggingface/diffusers.git@main#egg=diffusers
+git+https://github.com/huggingface/transformers.git@main#egg=transformers
+gradio
+hf_transfer
+huggingface_hub
+imageio
+imageio-ffmpeg
+insightface
+invisible_watermark
+matplotlib
+moviepy==1.0.3
+numpy<2.0
+onnxruntime
+onnxruntime-gpu
+omegaconf
+opencv-python
+opencv-python-headless
+git+https://github.com/huggingface/optimum-quanto
+packaging
+patch_conv
+Pillow==10.2.0
+psutil
+safetensors
+scipy
+scikit-learn
+scikit-image
+scikit-video
+sentencepiece
+setuptools
+spaces
+timm
+tokenizers>=0.13.3
+torch<2.6.0,>=2.4.0
+torchao
+torchaudio
+torchsde
+torchvision
+tqdm
+wheel
+git+https://github.com/huggingface/peft.git

utils.py ADDED Viewed

	@@ -0,0 +1,40 @@

+def install_packages():
+    import subprocess
+    import sys
+    import importlib
+    def _is_package_available(name) -> bool:
+        try:
+            importlib.import_module(name)
+            return True
+        except (ImportError, ModuleNotFoundError):
+            return False
+    # upgrade pip
+    subprocess.run(
+        f"{sys.executable} -m pip install --upgrade pip", shell=True, check=True
+    )
+    subprocess.run(
+        f"{sys.executable} -m pip install --upgrade ninja wheel setuptools packaging", shell=True, check=True
+    )
+    # install ninja
+    if not _is_package_available("ninja"):
+        subprocess.run(f"{sys.executable} -m pip install ninja nvidia-cudnn-cu12==9.1.0.70 nvidia-cublas-cu12==12.4.5.8 torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu124", shell=True, check=True)
+    # install flash attention
+    if not _is_package_available("flash_attn"):
+        subprocess.run(
+            f"{sys.executable} -m pip install -v -U flash-attention --no-build-isolation",
+            env={"MAX_JOBS": "1"},
+            shell=True,
+            check=True
+        )
+    # install xformers
+    if not _is_package_available("xformers"):
+        subprocess.run(
+            f"{sys.executable} -m pip install -v -U xformers nvidia-cudnn-cu12==9.1.0.70 nvidia-cublas-cu12==12.4.5.8 torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu124",
+            shell=True,
+            check=True
+        )