Spaces:
Running
on
Zero
Running
on
Zero
File size: 7,546 Bytes
b93c24a 37b6092 96fe5d9 ff944f7 37b6092 bacc813 d896a67 8c1251f bacc813 37b6092 b93c24a f107ce2 9af664a 1fc806b 9af664a b93c24a 9af664a b93c24a 1343135 b93c24a 4e5c199 b93c24a 37b6092 b93c24a 1343135 b93c24a 1343135 1fc806b 1343135 37b6092 b93c24a 1343135 b93c24a 1343135 37a3fc6 1343135 b93c24a 1343135 b93c24a 1343135 b93c24a 1343135 b93c24a 1343135 4543aed 319e13c 1343135 b93c24a 9af664a 698dade fd194a3 9af664a 698dade 9af664a a55649b b93c24a fd194a3 9af664a b93c24a d896a67 1a0c7fe d896a67 30e75e0 44845d4 d896a67 44845d4 d896a67 8c1251f d896a67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
import os
import sys
import gradio as gr
from inspiremusic.cli.inference import InspireMusicUnified, set_env_variables
import torchaudio
import datetime
import hashlib
import torch
from modelscope import snapshot_download
os.system('nvidia-smi')
print(torch.backends.cudnn.version())
def generate_filename():
now = datetime.datetime.now()
seconds_since_epoch = int(now.timestamp())
# Convert seconds to string
seconds_str = str(seconds_since_epoch)
# Hash the string using SHA-256
hash_object = hashlib.sha256(seconds_str.encode())
hash_string = hash_object.hexdigest()
return hash_string
def get_args(
task, text="", audio=None, model_name="InspireMusic-Base",
chorus="intro",
output_sample_rate=48000, max_generate_audio_seconds=30.0, time_start = 0.0, time_end=30.0, trim=False):
if output_sample_rate == 24000:
fast = True
else:
fast = False
# This function constructs the arguments required for InspireMusic
args = {
"task" : task,
"text" : text,
"audio_prompt" : audio,
"model_name" : model_name,
"chorus" : chorus,
"fast" : fast,
"fade_out" : True,
"trim" : trim,
"output_sample_rate" : output_sample_rate,
"min_generate_audio_seconds": 10.0,
"max_generate_audio_seconds": max_generate_audio_seconds,
"model_dir" : os.path.join("pretrained_models",
model_name),
"result_dir" : "exp/inspiremusic",
"output_fn" : generate_filename(),
"format" : "wav",
"time_start" : time_start,
"time_end": time_end,
"fade_out_duration": 1.0,
}
if args["time_start"] is None:
args["time_start"] = 0.0
args["time_end"] = args["time_start"] + args["max_generate_audio_seconds"]
print(args)
return args
def music_generation(args):
set_env_variables()
model = InspireMusicUnified(
model_name=args["model_name"],
model_dir=args["model_dir"],
min_generate_audio_seconds=args["min_generate_audio_seconds"],
max_generate_audio_seconds=args["max_generate_audio_seconds"],
sample_rate=24000,
output_sample_rate=args["output_sample_rate"],
load_jit=True,
load_onnx=False,
fast=args["fast"],
result_dir=args["result_dir"])
output_path = model.inference(
task=args["task"],
text=args["text"],
audio_prompt=args["audio_prompt"],
chorus=args["chorus"],
time_start=args["time_start"],
time_end=args["time_end"],
output_fn=args["output_fn"],
max_audio_prompt_length=args["max_audio_prompt_length"],
fade_out_duration=args["fade_out_duration"],
output_format=args["format"],
fade_out_mode=args["fade_out"],
trim=args["trim"])
return output_path
def update_text():
global text_input # Declare as global to modify the outer scope variable
text_input = "New value set by button click"
return text_input
default_prompts = [
"Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.",
"Compose an uplifting R&B song.",
"Create an emotional, introspective folk song with acoustic guitar and soft vocals."
]
def cut_audio(audio_file, cut_seconds=5):
audio, sr = torchaudio.load(audio_file)
num_samples = cut_seconds * sr
cutted_audio = audio[:, :num_samples]
output_path = os.path.join(os.getcwd(), "audio_prompt_" + generate_filename() + ".wav")
torchaudio.save(output_path, cutted_audio, sr)
return output_path
def run_text2music(text, model_name, chorus,
output_sample_rate, max_generate_audio_seconds):
args = get_args(
task='continuation', text=text, audio=None,
model_name=model_name, chorus=chorus,
output_sample_rate=output_sample_rate,
max_generate_audio_seconds=max_generate_audio_seconds)
return music_generation(args)
def run_continuation(text, audio, model_name, chorus,
output_sample_rate, max_generate_audio_seconds):
args = get_args(
task='text-to-music', text=text, audio=cut_audio(audio, cut_seconds=5),
model_name=model_name, chorus=chorus,
output_sample_rate=output_sample_rate,
max_generate_audio_seconds=max_generate_audio_seconds)
return music_generation(args)
def main():
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# InspireMusic
- Support text-to-music, music continuation, audio super-resolution, audio reconstruction tasks with high audio quality, with available sampling rates of 24kHz, 48kHz.
- Support long audio generation in multiple output audio formats, i.e., wav, flac, mp3, m4a.
- Open-source [InspireMusic-Base](https://modelscope.cn/models/iic/InspireMusic/summary), [InspireMusic-Base-24kHz](https://modelscope.cn/models/iic/InspireMusic-Base-24kHz/summary), [InspireMusic-1.5B](https://modelscope.cn/models/iic/InspireMusic-1.5B/summary), [InspireMusic-1.5B-24kHz](https://modelscope.cn/models/iic/InspireMusic-1.5B-24kHz/summary), [InspireMusic-1.5B-Long](https://modelscope.cn/models/iic/InspireMusic-1.5B-Long/summary) models for music generation.
- Currently only support English text prompts.
""")
with gr.Row(equal_height=True):
model_name = gr.Dropdown(["InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-1.5B-24kHz", "InspireMusic-Base", "InspireMusic-Base-24kHz"], label="Select Model Name", value="InspireMusic-Base")
chorus = gr.Dropdown(["intro", "verse", "chorus", "outro"],
label="Chorus Mode", value="intro")
output_sample_rate = gr.Dropdown([48000, 24000],
label="Output Audio Sample Rate (Hz)",
value=48000)
max_generate_audio_seconds = gr.Slider(10, 120,
label="Generate Audio Length (s)",
value=30)
with gr.Row(equal_height=True):
# Textbox for custom input
text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)", value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
audio_input = gr.Audio(label="Input Audio Prompt (For Music Continuation Task)",
type="filepath")
music_output = gr.Audio(label="Generated Music", type="filepath")
with gr.Row():
button = gr.Button("Text to Music")
button.click(run_text2music,
inputs=[text_input, model_name,
chorus,
output_sample_rate,
max_generate_audio_seconds],
outputs=music_output)
generate_button = gr.Button("Music Continuation")
generate_button.click(run_continuation,
inputs=[text_input, audio_input, model_name,
chorus,
output_sample_rate,
max_generate_audio_seconds],
outputs=music_output)
with gr.Column():
default_prompt_buttons = []
for prompt in default_prompts:
button = gr.Button(value=prompt)
button.click(run_text2music,
inputs=[text_input, model_name,
chorus,
output_sample_rate,
max_generate_audio_seconds],
outputs=music_output)
default_prompt_buttons.append(button)
demo.launch()
if __name__ == '__main__':
model_list = ["InspireMusic-Base", "InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-Base-24kHz", "InspireMusic-1.5B-24kHz"]
for model_name in model_list:
model_dir = f"pretrained_models/{model_name}"
if model_name == "InspireMusic-Base":
snapshot_download(f"iic/InspireMusic", local_dir=model_dir)
else:
snapshot_download(f"iic/{model_name}", local_dir=model_dir)
main() |