Maximofn commited on
Commit
4cfb1f8
·
1 Parent(s): f984ec1

Enhance CogVideoX Gradio interface with device selection, default parameters, and example prompts

Browse files
Files changed (1) hide show
  1. app.py +62 -20
app.py CHANGED
@@ -20,13 +20,15 @@ def generate_video(transformer_model, prompt, negative_prompt, num_frames, heigh
20
  transformer_model,
21
  torch_dtype=torch.bfloat16
22
  )
 
 
23
 
24
  # Inicializar el pipeline
25
  pipeline = DiffusionPipeline.from_pretrained(
26
  "THUDM/CogVideoX-5b",
27
  transformer=transformer,
28
  torch_dtype=torch.bfloat16
29
- ).to("cuda")
30
 
31
  # Generar el video
32
  video_frames = pipeline(
@@ -45,7 +47,7 @@ def generate_video(transformer_model, prompt, negative_prompt, num_frames, heigh
45
 
46
  # Crear la interfaz de Gradio
47
  with gr.Blocks() as demo:
48
- gr.Markdown("# Generador de Videos con CogVideoX")
49
 
50
  with gr.Row():
51
  with gr.Column():
@@ -53,59 +55,99 @@ with gr.Blocks() as demo:
53
  model_dropdown = gr.Dropdown(
54
  choices=TRANSFORMER_MODELS,
55
  value=TRANSFORMER_MODELS[0],
56
- label="Modelo Transformer"
57
  )
58
  prompt_input = gr.Textbox(
59
  lines=5,
60
  label="Prompt",
61
- placeholder="Describe el video que quieres generar..."
62
  )
63
  negative_prompt_input = gr.Textbox(
64
  lines=2,
65
- label="Prompt Negativo",
66
  value="inconsistent motion, blurry motion, worse quality, degenerate outputs, deformed outputs"
67
  )
68
 
69
- with gr.Accordion("Parámetros Avanzados", open=False):
70
  num_frames = gr.Slider(
71
  minimum=8,
72
  maximum=128,
73
- value=8,
74
  step=1,
75
- label="Número de Frames",
76
- info="Cantidad de frames en el video"
77
  )
78
  height = gr.Slider(
79
  minimum=32,
80
  maximum=1024,
81
- value=32,
82
  step=64,
83
- label="Altura",
84
- info="Altura del video en píxeles"
85
  )
86
  width = gr.Slider(
87
  minimum=32,
88
  maximum=1024,
89
- value=32,
90
  step=64,
91
- label="Anchura",
92
- info="Anchura del video en píxeles"
93
  )
94
  num_inference_steps = gr.Slider(
95
  minimum=10,
96
  maximum=100,
97
- value=10,
98
  step=1,
99
- label="Pasos de Inferencia",
100
- info="Mayor número = mejor calidad pero más lento"
101
  )
102
 
103
- generate_btn = gr.Button("Generar Video")
104
 
105
  with gr.Column():
106
  # Salida
107
- video_output = gr.Video(label="Video Generado")
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  # Conectar la función
110
  generate_btn.click(
111
  fn=generate_video,
 
20
  transformer_model,
21
  torch_dtype=torch.bfloat16
22
  )
23
+
24
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
 
26
  # Inicializar el pipeline
27
  pipeline = DiffusionPipeline.from_pretrained(
28
  "THUDM/CogVideoX-5b",
29
  transformer=transformer,
30
  torch_dtype=torch.bfloat16
31
+ ).to(device)
32
 
33
  # Generar el video
34
  video_frames = pipeline(
 
47
 
48
  # Crear la interfaz de Gradio
49
  with gr.Blocks() as demo:
50
+ gr.Markdown("# CogVideoX Video Generator")
51
 
52
  with gr.Row():
53
  with gr.Column():
 
55
  model_dropdown = gr.Dropdown(
56
  choices=TRANSFORMER_MODELS,
57
  value=TRANSFORMER_MODELS[0],
58
+ label="Transformer Model"
59
  )
60
  prompt_input = gr.Textbox(
61
  lines=5,
62
  label="Prompt",
63
+ placeholder="Describe the video you want to generate..."
64
  )
65
  negative_prompt_input = gr.Textbox(
66
  lines=2,
67
+ label="Negative Prompt",
68
  value="inconsistent motion, blurry motion, worse quality, degenerate outputs, deformed outputs"
69
  )
70
 
71
+ with gr.Accordion("Advanced Parameters", open=False):
72
  num_frames = gr.Slider(
73
  minimum=8,
74
  maximum=128,
75
+ value=50,
76
  step=1,
77
+ label="Number of Frames",
78
+ info="Number of frames in the video"
79
  )
80
  height = gr.Slider(
81
  minimum=32,
82
  maximum=1024,
83
+ value=256,
84
  step=64,
85
+ label="Height",
86
+ info="Video height in pixels"
87
  )
88
  width = gr.Slider(
89
  minimum=32,
90
  maximum=1024,
91
+ value=256,
92
  step=64,
93
+ label="Width",
94
+ info="Video width in pixels"
95
  )
96
  num_inference_steps = gr.Slider(
97
  minimum=10,
98
  maximum=100,
99
+ value=50,
100
  step=1,
101
+ label="Inference Steps",
102
+ info="Higher number = better quality but slower"
103
  )
104
 
105
+ generate_btn = gr.Button("Generate Video")
106
 
107
  with gr.Column():
108
  # Salida
109
+ video_output = gr.Video(label="Generated Video")
110
 
111
+ # Agregar ejemplos
112
+ gr.Examples(
113
+ examples=[
114
+ [
115
+ "sayakpaul/pika-dissolve-v0",
116
+ "PIKA_DISSOLVE A slender glass vase, brimming with tiny white pebbles, stands centered on a polished ebony dais. Without warning, the glass begins to dissolve from the edges inward. Wisps of translucent dust swirl upward in an elegant spiral, illuminating each pebble as they drop onto the dais. The gently drifting dust eventually settles, leaving only the scattered stones and faint traces of shimmering powder on the stage.",
117
+ "inconsistent motion, blurry motion, worse quality, degenerate outputs, deformed outputs",
118
+ 8, 32, 32, 10
119
+ ],
120
+ [
121
+ "finetrainers/crush-smol-v0",
122
+ "DIFF_crush A thick burger is placed on a dining table, and a large metal cylinder descends from above, crushing the burger as if it were under a hydraulic press. The bulb is crushed, leaving a pile of debris around it.",
123
+ "inconsistent motion, blurry motion, worse quality, degenerate outputs, deformed outputs",
124
+ 8, 32, 32, 10
125
+ ],
126
+ [
127
+ "finetrainers/3dgs-v0",
128
+ "3D_dissolve In a 3D appearance, a bookshelf filled with books is surrounded by a burst of red sparks, creating a dramatic and explosive effect against a black background.",
129
+ "inconsistent motion, blurry motion, worse quality, degenerate outputs, deformed outputs",
130
+ 8, 32, 32, 10
131
+ ],
132
+ [
133
+ "finetrainers/cakeify-v0",
134
+ "PIKA_CAKEIFY On a gleaming glass display stand, a sleek black purse quietly commands attention. Suddenly, a knife appears and slices through the shoe, revealing a fluffy vanilla sponge at its core. Immediately, it turns into a hyper-realistic prop cake, delighting the senses with its playful juxtaposition of the everyday and the extraordinary.",
135
+ "inconsistent motion, blurry motion, worse quality, degenerate outputs, deformed outputs",
136
+ 8, 32, 32, 10
137
+ ]
138
+ ],
139
+ inputs=[
140
+ model_dropdown,
141
+ prompt_input,
142
+ negative_prompt_input,
143
+ num_frames,
144
+ height,
145
+ width,
146
+ num_inference_steps
147
+ ],
148
+ label="Prompt Examples"
149
+ )
150
+
151
  # Conectar la función
152
  generate_btn.click(
153
  fn=generate_video,