|
import gradio as gr |
|
|
|
import torch |
|
|
|
from spectro import wav_bytes_from_spectrogram_image |
|
from diffusers import StableDiffusionPipeline |
|
|
|
model_id = "riffusion/riffusion-model-v1" |
|
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) |
|
pipe = pipe.to("cuda") |
|
|
|
def predict(prompt): |
|
spec = pipe(prompt).images[0] |
|
wav = wav_bytes_from_spectrogram_image(spec) |
|
with open("output.wav", "wb") as f: |
|
f.write(wav[0].getbuffer()) |
|
return 'output.wav' |
|
|
|
gr.Interface( |
|
predict, |
|
inputs="text", |
|
outputs=gr.outputs.Audio(type='filepath'), |
|
title="Riffusion", |
|
).launch(debug=True) |
|
|