InspireMusic / app.py
chong.zhang
update
f107ce2
raw
history blame
3.69 kB
import os
import gradio as gr
from inspiremusic.cli.inference import InspireMusicUnified, set_env_variables
def get_args(
task, text="", audio=None, model_name="InspireMusic-Base",
chorus="intro", fast=False, fade_out=True, trim=False,
output_sample_rate=48000, max_generate_audio_seconds=30.0):
# This function constructs the arguments required for InspireMusic
args = {
"task" : task,
"text" : text,
"audio_prompt" : audio,
"model_name" : model_name,
"chorus" : chorus,
"fast" : fast,
"fade_out" : fade_out,
"trim" : trim,
"output_sample_rate" : output_sample_rate,
"max_generate_audio_seconds": max_generate_audio_seconds,
"model_dir" : os.path.join("pretrained_models",
model_name),
"result_dir" : "exp/inspiremusic",
"output_fn" : "output_audio",
"format" : "wav",
}
return args
def music_generation(args):
set_env_variables()
model = InspireMusicUnified(
model_name=args["model_name"],
model_dir=args["model_dir"],
fast=args["fast"],
fade_out_mode=args["fade_out"],
trim=args["trim"],
output_sample_rate=args["output_sample_rate"],
max_generate_audio_seconds=args["max_generate_audio_seconds"]
)
output_path = model.inference(
task=args["task"],
text=args["text"],
audio_prompt=args["audio_prompt"],
chorus=args["chorus"],
output_fn=args["output_fn"],
output_format=args["format"],
time_start=0.0, time_end=30.0
)
return output_path
def run_inspiremusic(task, text, audio, model_name, chorus, fast, fade_out,
trim, output_sample_rate, max_generate_audio_seconds):
args = get_args(
task=task, text=text, audio=audio,
model_name=model_name, chorus=chorus, fast=fast,
fade_out=fade_out, trim=trim, output_sample_rate=output_sample_rate,
max_generate_audio_seconds=max_generate_audio_seconds)
return music_generation(args)
with gr.Blocks() as demo:
gr.Markdown("""
# InspireMusic:
Generate music using InspireMusic with various tasks such as "Text-to-Music" or "Music Continuation".
""")
with gr.Row():
task = gr.Radio(["text-to-music", "continuation"], label="Select Task",
value="text-to-music")
model_name = gr.Dropdown(["InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-1.5B-24kHz", "InspireMusic-Base", "InspireMusic-Base-24kHz"], label="Model Name", value="InspireMusic-Base")
text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)")
audio_input = gr.Audio(label="Input Audio (For Music Continuation Task)",
source="upload", type="filepath")
with gr.Column():
chorus = gr.Dropdown(["verse", "chorus", "intro", "outro"],
label="Chorus Mode", value="intro")
fast = gr.Checkbox(label="Fast Inference", value=False)
fade_out = gr.Checkbox(label="Apply Fade Out", value=True)
trim = gr.Checkbox(label="Trim Silence", value=False)
output_sample_rate = gr.Dropdown([24000, 48000],
label="Output Sample Rate",
value=48000)
max_generate_audio_seconds = gr.Slider(10, 300,
label="Max Generated Audio Length (Seconds)",
value=30)
music_output = gr.Audio(label="Generated Music Result", type="filepath")
generate_button = gr.Button("Generate Music")
generate_button.click(run_inspiremusic,
inputs=[task, text_input, audio_input, model_name,
chorus, fast, fade_out, trim,
output_sample_rate,
max_generate_audio_seconds],
outputs=music_output)
demo.launch()