File size: 2,552 Bytes
9d749c2 f2582cd eb4dbcc 7e73b22 9d749c2 7e73b22 9d749c2 21462bf 9d749c2 be5bb7c 9d749c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import argparse
import gradio as gr
from audiodiffusion import AudioDiffusion
def generate_spectrogram_audio_and_loop(audio_file,model_id):
print(audio_file)
print(model_id)
audio_diffusion = AudioDiffusion(model_id=model_id)
image, (sample_rate,
audio) = audio_diffusion.generate_spectrogram_and_audio_from_audio(audio_file)
loop = AudioDiffusion.loop_it(audio, sample_rate)
if loop is None:
loop = audio
return image, (sample_rate, audio), (sample_rate, loop)
demo = gr.Interface(fn=generate_spectrogram_audio_and_loop,
title="Audio Diffusion",
description="Forked from https://huggingface.co/spaces/teticio/audio-diffusion Built to style transfer to audio using Huggingface diffusers.\
Outputs a 5 second audio clip with elements from the initial audio uploaded. This takes about 2 hours without a GPU, so why not bake a cake in the meantime? (Or try the teticio/audio-diffusion-ddim-256 \
model which is faster.) The code for doing style transfer method was already into teticio's repo and python notebooks I just, I think hooked it up into a hugging face space. still need some more testing and such but would be cool hook up step number and then to also do inpainting and outpointing In this space and get the api working with the updated pipelines",
inputs=[
gr.Audio(source="upload",type="filepath"),
gr.Dropdown(label="Model",
choices=[
"teticio/audio-diffusion-256",
"teticio/audio-diffusion-breaks-256",
"teticio/audio-diffusion-instrumental-hiphop-256",
"teticio/audio-diffusion-ddim-256"
],
value="teticio/audio-diffusion-256")
],
outputs=[
gr.Image(label="Mel spectrogram", image_mode="L"),
gr.Audio(label="Audio"),
gr.Audio(label="Loop"),
],
allow_flagging="never")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--port", type=int)
parser.add_argument("--server", type=int)
args = parser.parse_args()
demo.launch(server_name=args.server or "0.0.0.0", server_port=args.port)
|