File size: 8,989 Bytes
21fdbeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import gradio as gr
import gradio as gr
import re 
import subprocess
import time
import select
from tqdm import tqdm
from huggingface_hub import snapshot_download

#Download model
snapshot_download(
    repo_id = "Wan-AI/Wan2.1-T2V-1.3B",
    local_dir = "./Wan2.1-T2V-1.3B"
)

def infer(prompt, progress=gr.Progress(track_tqdm=True)):
    
    # Configuration:
    total_process_steps = 11          # Total INFO messages expected
    irrelevant_steps = 4              # First 4 INFO messages are ignored  
    relevant_steps = total_process_steps - irrelevant_steps  # 7 overall steps

    # Create overall progress bar (Level 1)
    overall_bar = tqdm(total=relevant_steps, desc="Overall Process", position=1,
                       ncols=120, dynamic_ncols=False, leave=True)
    processed_steps = 0

    # Regex for video generation progress (Level 3)
    progress_pattern = re.compile(r"(\d+)%\|.*\| (\d+)/(\d+)")
    video_progress_bar = None

    # Variables for sub-step progress bar (Level 2)
    # Now using 1000 ticks to represent 40 seconds (each tick = 40 ms)
    sub_bar = None
    sub_ticks = 0
    sub_tick_total = 1500
    video_phase = False

    command = [
        "python", "-u", "-m", "generate",  # using -u for unbuffered output
        "--task", "t2v-1.3B",
        "--size", "832*480",
        "--ckpt_dir", "./Wan2.1-T2V-1.3B",
        "--sample_shift", "8",
        "--sample_guide_scale", "6",
        "--prompt", prompt,
        "--save_file", "generated_video.mp4"
    ]

    process = subprocess.Popen(command,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT,
                               text=True,
                               bufsize=1)

    while True:
        # Poll stdout with a 40ms timeout.
        rlist, _, _ = select.select([process.stdout], [], [], 0.04)
        if rlist:
            line = process.stdout.readline()
            if not line:
                break
            stripped_line = line.strip()
            if not stripped_line:
                continue

            # Check for video generation progress (Level 3)
            progress_match = progress_pattern.search(stripped_line)
            if progress_match:
                # If a sub-step bar is active, finish it before entering video phase.
                if sub_bar is not None:
                    if sub_ticks < sub_tick_total:
                        sub_bar.update(sub_tick_total - sub_ticks)
                    sub_bar.close()
                    overall_bar.update(1)
                    overall_bar.refresh()
                    sub_bar = None
                    sub_ticks = 0
                video_phase = True
                current = int(progress_match.group(2))
                total = int(progress_match.group(3))
                if video_progress_bar is None:
                    video_progress_bar = tqdm(total=total, desc="Video Generation", position=0,
                                              ncols=120, dynamic_ncols=True, leave=True)
                video_progress_bar.update(current - video_progress_bar.n)
                video_progress_bar.refresh()
                if video_progress_bar.n >= video_progress_bar.total:
                    video_phase = False
                    overall_bar.update(1)
                    overall_bar.refresh()
                    video_progress_bar.close()
                    video_progress_bar = None
                continue

            # Process INFO messages (Level 2 sub-step)
            if "INFO:" in stripped_line:
                parts = stripped_line.split("INFO:", 1)
                msg = parts[1].strip() if len(parts) > 1 else ""
                print(stripped_line)  # Log the message

                # For the first 4 INFO messages, simply count them.
                if processed_steps < irrelevant_steps:
                    processed_steps += 1
                    continue
                else:
                    # A new relevant INFO message has arrived.
                    # If a sub-bar exists (whether full or not), finish it now.
                    if sub_bar is not None:
                        if sub_ticks < sub_tick_total:
                            sub_bar.update(sub_tick_total - sub_ticks)
                        sub_bar.close()
                        overall_bar.update(1)
                        overall_bar.refresh()
                        sub_bar = None
                        sub_ticks = 0
                    # Start a new sub-step bar for the current INFO message.
                    sub_bar = tqdm(total=sub_tick_total, desc=msg, position=2,
                                   ncols=120, dynamic_ncols=False, leave=True)
                    sub_ticks = 0
                continue
            else:
                print(stripped_line)
        else:
            # No new data within 40ms.
            if sub_bar is not None:
                if sub_ticks < sub_tick_total:
                    sub_bar.update(1)
                    sub_ticks += 1
                    sub_bar.refresh()
                # If full (40 seconds reached), do not advance overall step—just remain waiting.
        if process.poll() is not None:
            break

    # Drain any remaining output.
    for line in process.stdout:
        print(line.strip())
    process.wait()
    if video_progress_bar is not None:
        video_progress_bar.close()
    if sub_bar is not None:
        sub_bar.close()
    overall_bar.close()
    
    if process.returncode == 0:
        print("Command executed successfully.")
        return "generated_video.mp4"
    else:
        print("Error executing command.")
        raise Exception("Error executing command")

# Gradio UI Components
PROMPT_EXAMPLES = [
    "A dramatic scene set in a burning city at night, with embers, smoke, and ash filling the air. The environment is a desolate wasteland of collapsed buildings and ruins. A lone male character with fiery, messy red and orange hair looks back with a determined expression and glowing red eyes. His clothing is torn and tattered, with long sleeves and worn leather details. The atmosphere is intense and dreamy, with warm, vivid colors and dramatic lighting. The background is highly detailed, capturing the chaos and destruction of the burning city. The scene is a masterpiece, with an aesthetic inspired by artists like Rella and Konya Karasue, and a saturated, vivid color palette.",
    "A wide shot of sprawling ruins, with fires burning intensely at night. The full moon is obscured by smoke, and wind blows debris across the scene. A lone male character stands amidst scattered playing cards, with a Joker card prominently visible. He has long red hair flowing in the wind, partially obscuring his face, and an intense gaze implied through his hidden eyes. His clothing includes a flowing, tattered crimson and black cloak, a simple dark shirt, and leather gloves. The pose is dynamic, with arms slightly outstretched as if gesturing to the chaos. The atmosphere is chaotic, uncertain, fateful, and ominous, with a sense of impending doom. The scene is highly detailed, with vibrant fire colors, dramatic lighting, and a masterpiece aesthetic, inspired by the styles of Frank Frazetta and Moebius.",
    "A scene featuring a single girl, styled by artists like WANKE, free_style, ningen_mame, and ciloranko. The character is Tokoyami Towa, portrayed as a mischievous devil with a sensitive demeanor. The scene is set in a dark theme, with glowing eyes and a silhouette holding a sword. The atmosphere is intense and mysterious, with a focus on the character's glowing eyes and the dark, shadowy environment.",
]

with gr.Blocks(title="Wan 2.1 Video Generator", theme=gr.themes.Soft()) as demo:
    # Header Section
    gr.Markdown("# 🎥 Wan 2.1 Text-to-Video Generator")
    gr.Markdown("Transform text prompts into dynamic videos - Duplicate this Space to run without queue! 🔥")
    
    # Main Interface
    with gr.Row(variant="panel"):
        with gr.Column(scale=4):
            prompt_input = gr.Textbox(
                label="Creative Prompt",
                placeholder="Describe your video scene here...",
                lines=4,
                max_lines=6,
            )
            generate_btn = gr.Button("Generate Video", variant="primary")
        
        with gr.Column(scale=6):
            output_video = gr.Video(
                label="Generated Video",
                format="mp4",
                interactive=False,
                elem_classes="output-video"
            )
    
    # Event Binding
    generate_btn.click(
        fn=infer,
        inputs=prompt_input,
        outputs=output_video
    )

    gr.Examples(
        PROMPT_EXAMPLES,
        [prompt_input]
    )

# Launch configuration
if __name__ == "__main__":
    demo.launch(
        show_error=True,
        show_api=True,
        server_port=7860,
        server_name="0.0.0.0",
        share = True
    )