Spaces:

Fabrice-TIERCELIN
/

for-pinokio

Runtime error

App Files Files Community

Fabrice-TIERCELIN commited on May 12, 2024

Commit

33e899a

verified ·

1 Parent(s): b8d94a7

Seed

Browse files

Files changed (1) hide show

app.py +42 -11

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ from audioldm.audio.stft import TacotronSTFT
 from audioldm.variational_autoencoder import AutoencoderKL
 from pydub import AudioSegment
 # Automatic device detection
 if torch.cuda.is_available():
     device_type = "cuda"
@@ -81,11 +83,18 @@ tango.vae.to(device_type)
 tango.stft.to(device_type)
 tango.model.to(device_type)
 def check(
     prompt,
     output_number,
     steps,
-    guidance
 ):
     if prompt is None or prompt == "":
         raise gr.Error("Please provide a prompt input.")
@@ -104,9 +113,18 @@ def text2audio(
     prompt,
     output_number,
     steps,
-    guidance
 ):
     start = time.time()
     output_wave = tango.generate(prompt, steps, guidance, output_number)
     output_wave_1 = gr.make_waveform((16000, output_wave[0]))
@@ -162,6 +180,8 @@ with gr.Blocks() as interface:
         output_number = gr.Slider(label = "Number of generations", info = "1, 2 or 3 output files", minimum = 1, maximum = 3, value = 3, step = 1, interactive = True)
         denoising_steps = gr.Slider(label = "Steps", info = "lower=faster & variant, higher=audio quality & similar", minimum = 10, maximum = 200, value = 10, step = 1, interactive = True)
         guidance_scale = gr.Slider(label = "Guidance Scale", info = "lower=audio quality, higher=follow the prompt", minimum = 1, maximum = 10, value = 3, step = 0.1, interactive = True)
     submit = gr.Button("🚀 Generate", variant = "primary")
@@ -170,11 +190,18 @@ with gr.Blocks() as interface:
     output_audio_3 = gr.Audio(label = "Generated Audio #3/3", format = "wav", type="numpy")
     information = gr.Label(label = "Information")
-    submit.click(fn = check, inputs = [
         input_text,
         output_number,
         denoising_steps,
-        guidance_scale
     ], outputs = [], queue = False, show_progress = False).success(fn = update_output, inputs = [
         output_format,
         output_number
@@ -187,7 +214,9 @@ with gr.Blocks() as interface:
         input_text,
         output_number,
         denoising_steps,
-        guidance_scale
     ], outputs = [
         output_audio_1,
         output_audio_2,
@@ -209,7 +238,9 @@ with gr.Blocks() as interface:
             input_text,
             output_number,
             denoising_steps,
-            guidance_scale
         ],
 	    outputs = [
             output_audio_1,
@@ -218,11 +249,11 @@ with gr.Blocks() as interface:
             information
         ],
         examples = [
-                ["A hammer is hitting a wooden surface", 3, 100, 3],
-                ["Peaceful and calming ambient music with singing bowl and other instruments.", 3, 100, 3],
-                ["A man is speaking in a small room.", 2, 100, 3],
-                ["A female is speaking followed by footstep sound", 1, 100, 3],
-                ["Wooden table tapping sound followed by water pouring sound.", 3, 200, 3],
             ],
         cache_examples = "lazy",
     )

 from audioldm.variational_autoencoder import AutoencoderKL
 from pydub import AudioSegment
+max_64_bit_int = 2**63 - 1
 # Automatic device detection
 if torch.cuda.is_available():
     device_type = "cuda"
 tango.stft.to(device_type)
 tango.model.to(device_type)
+def update_seed(is_randomize_seed, seed):
+    if is_randomize_seed:
+        return random.randint(0, max_64_bit_int)
+    return seed
 def check(
     prompt,
     output_number,
     steps,
+    guidance,
+    is_randomize_seed,
+    seed
 ):
     if prompt is None or prompt == "":
         raise gr.Error("Please provide a prompt input.")
     prompt,
     output_number,
     steps,
+    guidance,
+    is_randomize_seed,
+    seed
 ):
     start = time.time()
+    if seed is None:
+        seed = random.randint(0, max_64_bit_int)
+    random.seed(seed)
+    torch.manual_seed(seed)
     output_wave = tango.generate(prompt, steps, guidance, output_number)
     output_wave_1 = gr.make_waveform((16000, output_wave[0]))
         output_number = gr.Slider(label = "Number of generations", info = "1, 2 or 3 output files", minimum = 1, maximum = 3, value = 3, step = 1, interactive = True)
         denoising_steps = gr.Slider(label = "Steps", info = "lower=faster & variant, higher=audio quality & similar", minimum = 10, maximum = 200, value = 10, step = 1, interactive = True)
         guidance_scale = gr.Slider(label = "Guidance Scale", info = "lower=audio quality, higher=follow the prompt", minimum = 1, maximum = 10, value = 3, step = 0.1, interactive = True)
+        randomize_seed = gr.Checkbox(label = "\U0001F3B2 Randomize seed", value = True, info = "If checked, result is always different")
+        seed = gr.Slider(minimum = 0, maximum = max_64_bit_int, step = 1, randomize = True, label = "Seed")
     submit = gr.Button("🚀 Generate", variant = "primary")
     output_audio_3 = gr.Audio(label = "Generated Audio #3/3", format = "wav", type="numpy")
     information = gr.Label(label = "Information")
+    submit.click(fn = update_seed, inputs = [
+        randomize_seed,
+        seed
+    ], outputs = [
+        seed
+    ], queue = False, show_progress = False).then(fn = check, inputs = [
         input_text,
         output_number,
         denoising_steps,
+        guidance_scale,
+        randomize_seed,
+        seed
     ], outputs = [], queue = False, show_progress = False).success(fn = update_output, inputs = [
         output_format,
         output_number
         input_text,
         output_number,
         denoising_steps,
+        guidance_scale,
+        randomize_seed,
+        seed
     ], outputs = [
         output_audio_1,
         output_audio_2,
             input_text,
             output_number,
             denoising_steps,
+            guidance_scale,
+            randomize_seed,
+            seed
         ],
 	    outputs = [
             output_audio_1,
             information
         ],
         examples = [
+                ["A hammer is hitting a wooden surface", 3, 100, 3, False, 123],
+                ["Peaceful and calming ambient music with singing bowl and other instruments.", 3, 100, 3, False, 123],
+                ["A man is speaking in a small room.", 2, 100, 3, False, 123],
+                ["A female is speaking followed by footstep sound", 1, 100, 3, False, 123],
+                ["Wooden table tapping sound followed by water pouring sound.", 3, 200, 3, False, 123],
             ],
         cache_examples = "lazy",
     )