File size: 1,156 Bytes
0c98d02 398c97e 0c98d02 18b0529 85d7512 18b0529 c380881 18b0529 6c877a7 18b0529 6c877a7 420027f 3e01ed2 c0f3731 3e01ed2 c0f3731 420027f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import gradio as gr
import torch
from spectro import wav_bytes_from_spectrogram_image
from diffusers import StableDiffusionPipeline
model_id = "riffusion/riffusion-model-v1"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")
def predict(prompt):
spec = pipe(prompt).images[0]
print(spec)
wav = wav_bytes_from_spectrogram_image(spec)
with open("output.wav", "wb") as f:
f.write(wav[0].getbuffer())
return spec, 'output.wav'
gr.Interface(
predict,
inputs="text",
outputs=[gr.Image(), gr.Audio(type='filepath')],
title="Riffusion Text-to-Music",
description="""<p style="text-align: center;">Describe a musical prompt, generate music by getting a Riffusion spectrogram and its corresponding sound.
<br />if you want to skip the queue, or get faster inference, you can duplicate this space:
<a href="https://huggingface.co/spaces/fffiloni/spectrogram-to-music?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
</p>
"""
).queue(max_size=32, concurrency_count=20).launch(debug=True)
|