File size: 1,156 Bytes
0c98d02
 
398c97e
0c98d02
18b0529
85d7512
 
 
 
 
 
18b0529
 
c380881
18b0529
 
 
6c877a7
18b0529
 
 
 
6c877a7
420027f
3e01ed2
 
c0f3731
3e01ed2
c0f3731
420027f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import gradio as gr

import torch

from spectro import wav_bytes_from_spectrogram_image
from diffusers import StableDiffusionPipeline

model_id = "riffusion/riffusion-model-v1"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

def predict(prompt):
    spec = pipe(prompt).images[0]
    print(spec)
    wav = wav_bytes_from_spectrogram_image(spec)
    with open("output.wav", "wb") as f:
        f.write(wav[0].getbuffer())
    return spec, 'output.wav'

gr.Interface(
    predict,
    inputs="text",
    outputs=[gr.Image(), gr.Audio(type='filepath')],
    title="Riffusion Text-to-Music",
    description="""<p style="text-align: center;">Describe a musical prompt, generate music by getting a Riffusion spectrogram and its corresponding sound.
              <br />if you want to skip the queue, or get faster inference, you can duplicate this space: 
              <a href="https://huggingface.co/spaces/fffiloni/spectrogram-to-music?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
              </p>
"""
).queue(max_size=32, concurrency_count=20).launch(debug=True)