|
import gradio as gr |
|
|
|
import torch |
|
|
|
from spectro import wav_bytes_from_spectrogram_image |
|
from diffusers import StableDiffusionPipeline |
|
|
|
model_id = "riffusion/riffusion-model-v1" |
|
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) |
|
pipe = pipe.to("cuda") |
|
|
|
def predict(prompt): |
|
spec = pipe(prompt).images[0] |
|
print(spec) |
|
wav = wav_bytes_from_spectrogram_image(spec) |
|
with open("output.wav", "wb") as f: |
|
f.write(wav[0].getbuffer()) |
|
return 'output.wav' |
|
|
|
gr.Interface( |
|
predict, |
|
inputs="text", |
|
outputs=[gr.Audio(type='filepath')], |
|
title="Riffusion Text-to-Music", |
|
description="Describe a musical prompt, generate music by getting a Riffusion spectrogram and its corresponding sound" |
|
).queue(max_size=32, concurrency_count=20).launch(debug=True) |
|
|