File size: 8,989 Bytes
21fdbeb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import gradio as gr
import gradio as gr
import re
import subprocess
import time
import select
from tqdm import tqdm
from huggingface_hub import snapshot_download
#Download model
snapshot_download(
repo_id = "Wan-AI/Wan2.1-T2V-1.3B",
local_dir = "./Wan2.1-T2V-1.3B"
)
def infer(prompt, progress=gr.Progress(track_tqdm=True)):
# Configuration:
total_process_steps = 11 # Total INFO messages expected
irrelevant_steps = 4 # First 4 INFO messages are ignored
relevant_steps = total_process_steps - irrelevant_steps # 7 overall steps
# Create overall progress bar (Level 1)
overall_bar = tqdm(total=relevant_steps, desc="Overall Process", position=1,
ncols=120, dynamic_ncols=False, leave=True)
processed_steps = 0
# Regex for video generation progress (Level 3)
progress_pattern = re.compile(r"(\d+)%\|.*\| (\d+)/(\d+)")
video_progress_bar = None
# Variables for sub-step progress bar (Level 2)
# Now using 1000 ticks to represent 40 seconds (each tick = 40 ms)
sub_bar = None
sub_ticks = 0
sub_tick_total = 1500
video_phase = False
command = [
"python", "-u", "-m", "generate", # using -u for unbuffered output
"--task", "t2v-1.3B",
"--size", "832*480",
"--ckpt_dir", "./Wan2.1-T2V-1.3B",
"--sample_shift", "8",
"--sample_guide_scale", "6",
"--prompt", prompt,
"--save_file", "generated_video.mp4"
]
process = subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1)
while True:
# Poll stdout with a 40ms timeout.
rlist, _, _ = select.select([process.stdout], [], [], 0.04)
if rlist:
line = process.stdout.readline()
if not line:
break
stripped_line = line.strip()
if not stripped_line:
continue
# Check for video generation progress (Level 3)
progress_match = progress_pattern.search(stripped_line)
if progress_match:
# If a sub-step bar is active, finish it before entering video phase.
if sub_bar is not None:
if sub_ticks < sub_tick_total:
sub_bar.update(sub_tick_total - sub_ticks)
sub_bar.close()
overall_bar.update(1)
overall_bar.refresh()
sub_bar = None
sub_ticks = 0
video_phase = True
current = int(progress_match.group(2))
total = int(progress_match.group(3))
if video_progress_bar is None:
video_progress_bar = tqdm(total=total, desc="Video Generation", position=0,
ncols=120, dynamic_ncols=True, leave=True)
video_progress_bar.update(current - video_progress_bar.n)
video_progress_bar.refresh()
if video_progress_bar.n >= video_progress_bar.total:
video_phase = False
overall_bar.update(1)
overall_bar.refresh()
video_progress_bar.close()
video_progress_bar = None
continue
# Process INFO messages (Level 2 sub-step)
if "INFO:" in stripped_line:
parts = stripped_line.split("INFO:", 1)
msg = parts[1].strip() if len(parts) > 1 else ""
print(stripped_line) # Log the message
# For the first 4 INFO messages, simply count them.
if processed_steps < irrelevant_steps:
processed_steps += 1
continue
else:
# A new relevant INFO message has arrived.
# If a sub-bar exists (whether full or not), finish it now.
if sub_bar is not None:
if sub_ticks < sub_tick_total:
sub_bar.update(sub_tick_total - sub_ticks)
sub_bar.close()
overall_bar.update(1)
overall_bar.refresh()
sub_bar = None
sub_ticks = 0
# Start a new sub-step bar for the current INFO message.
sub_bar = tqdm(total=sub_tick_total, desc=msg, position=2,
ncols=120, dynamic_ncols=False, leave=True)
sub_ticks = 0
continue
else:
print(stripped_line)
else:
# No new data within 40ms.
if sub_bar is not None:
if sub_ticks < sub_tick_total:
sub_bar.update(1)
sub_ticks += 1
sub_bar.refresh()
# If full (40 seconds reached), do not advance overall step—just remain waiting.
if process.poll() is not None:
break
# Drain any remaining output.
for line in process.stdout:
print(line.strip())
process.wait()
if video_progress_bar is not None:
video_progress_bar.close()
if sub_bar is not None:
sub_bar.close()
overall_bar.close()
if process.returncode == 0:
print("Command executed successfully.")
return "generated_video.mp4"
else:
print("Error executing command.")
raise Exception("Error executing command")
# Gradio UI Components
PROMPT_EXAMPLES = [
"A dramatic scene set in a burning city at night, with embers, smoke, and ash filling the air. The environment is a desolate wasteland of collapsed buildings and ruins. A lone male character with fiery, messy red and orange hair looks back with a determined expression and glowing red eyes. His clothing is torn and tattered, with long sleeves and worn leather details. The atmosphere is intense and dreamy, with warm, vivid colors and dramatic lighting. The background is highly detailed, capturing the chaos and destruction of the burning city. The scene is a masterpiece, with an aesthetic inspired by artists like Rella and Konya Karasue, and a saturated, vivid color palette.",
"A wide shot of sprawling ruins, with fires burning intensely at night. The full moon is obscured by smoke, and wind blows debris across the scene. A lone male character stands amidst scattered playing cards, with a Joker card prominently visible. He has long red hair flowing in the wind, partially obscuring his face, and an intense gaze implied through his hidden eyes. His clothing includes a flowing, tattered crimson and black cloak, a simple dark shirt, and leather gloves. The pose is dynamic, with arms slightly outstretched as if gesturing to the chaos. The atmosphere is chaotic, uncertain, fateful, and ominous, with a sense of impending doom. The scene is highly detailed, with vibrant fire colors, dramatic lighting, and a masterpiece aesthetic, inspired by the styles of Frank Frazetta and Moebius.",
"A scene featuring a single girl, styled by artists like WANKE, free_style, ningen_mame, and ciloranko. The character is Tokoyami Towa, portrayed as a mischievous devil with a sensitive demeanor. The scene is set in a dark theme, with glowing eyes and a silhouette holding a sword. The atmosphere is intense and mysterious, with a focus on the character's glowing eyes and the dark, shadowy environment.",
]
with gr.Blocks(title="Wan 2.1 Video Generator", theme=gr.themes.Soft()) as demo:
# Header Section
gr.Markdown("# 🎥 Wan 2.1 Text-to-Video Generator")
gr.Markdown("Transform text prompts into dynamic videos - Duplicate this Space to run without queue! 🔥")
# Main Interface
with gr.Row(variant="panel"):
with gr.Column(scale=4):
prompt_input = gr.Textbox(
label="Creative Prompt",
placeholder="Describe your video scene here...",
lines=4,
max_lines=6,
)
generate_btn = gr.Button("Generate Video", variant="primary")
with gr.Column(scale=6):
output_video = gr.Video(
label="Generated Video",
format="mp4",
interactive=False,
elem_classes="output-video"
)
# Event Binding
generate_btn.click(
fn=infer,
inputs=prompt_input,
outputs=output_video
)
gr.Examples(
PROMPT_EXAMPLES,
[prompt_input]
)
# Launch configuration
if __name__ == "__main__":
demo.launch(
show_error=True,
show_api=True,
server_port=7860,
server_name="0.0.0.0",
share = True
) |