chong.zhang commited on
Commit
d896a67
·
1 Parent(s): 489e7d6
Files changed (1) hide show
  1. app.py +67 -61
app.py CHANGED
@@ -6,8 +6,7 @@ import torchaudio
6
  import datetime
7
  import hashlib
8
  import torch
9
- import importlib
10
- os.system('nvidia-smi')
11
  print(torch.backends.cudnn.version())
12
 
13
  def generate_filename():
@@ -126,63 +125,70 @@ def run_continuation(text, audio, model_name, chorus,
126
  max_generate_audio_seconds=max_generate_audio_seconds)
127
  return music_generation(args)
128
 
129
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
130
- gr.Markdown("""
131
- # InspireMusic
132
- - Support text-to-music, music continuation, audio super-resolution, audio reconstruction tasks with high audio quality, with available sampling rates of 24kHz, 48kHz.
133
- - Support long audio generation in multiple output audio formats, i.e., wav, flac, mp3, m4a.
134
- - Open-source [InspireMusic-Base](https://modelscope.cn/models/iic/InspireMusic/summary), [InspireMusic-Base-24kHz](https://modelscope.cn/models/iic/InspireMusic-Base-24kHz/summary), [InspireMusic-1.5B](https://modelscope.cn/models/iic/InspireMusic-1.5B/summary), [InspireMusic-1.5B-24kHz](https://modelscope.cn/models/iic/InspireMusic-1.5B-24kHz/summary), [InspireMusic-1.5B-Long](https://modelscope.cn/models/iic/InspireMusic-1.5B-Long/summary) models for music generation.
135
- - Currently only support English text prompts.
136
- """)
137
-
138
- with gr.Row(equal_height=True):
139
- model_name = gr.Dropdown(["InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-1.5B-24kHz", "InspireMusic-Base", "InspireMusic-Base-24kHz"], label="Select Model Name", value="InspireMusic-Base")
140
- chorus = gr.Dropdown(["intro", "verse", "chorus", "outro"],
141
- label="Chorus Mode", value="intro")
142
- output_sample_rate = gr.Dropdown([48000, 24000],
143
- label="Output Audio Sample Rate (Hz)",
144
- value=48000)
145
- max_generate_audio_seconds = gr.Slider(10, 120,
146
- label="Generate Audio Length (s)",
147
- value=30)
148
- # with gr.Column():
149
- # fast = gr.Checkbox(label="Fast Inference", value=False)
150
- # fade_out = gr.Checkbox(label="Apply Fade Out Effect", value=True)
151
-
152
- with gr.Row(equal_height=True):
153
- # Textbox for custom input
154
- text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)", value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
155
-
156
- audio_input = gr.Audio(label="Input Audio Prompt (For Music Continuation Task)",
157
- type="filepath")
158
- music_output = gr.Audio(label="Generated Music", type="filepath")
159
-
160
- with gr.Row():
161
- button = gr.Button("Text to Music")
162
- button.click(run_text2music,
163
- inputs=[text_input, model_name,
164
- chorus,
165
- output_sample_rate,
166
- max_generate_audio_seconds],
167
- outputs=music_output)
168
-
169
- generate_button = gr.Button("Music Continuation")
170
- generate_button.click(run_continuation,
171
- inputs=[text_input, audio_input, model_name,
172
- chorus,
173
- output_sample_rate,
174
- max_generate_audio_seconds],
175
- outputs=music_output)
176
-
177
- with gr.Column():
178
- default_prompt_buttons = []
179
- for prompt in default_prompts:
180
- button = gr.Button(value=prompt)
181
  button.click(run_text2music,
182
- inputs=[text_input, model_name,
183
- chorus,
184
- output_sample_rate,
185
- max_generate_audio_seconds],
186
- outputs=music_output)
187
- default_prompt_buttons.append(button)
188
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import datetime
7
  import hashlib
8
  import torch
9
+ from modelscope import snapshot_download
 
10
  print(torch.backends.cudnn.version())
11
 
12
  def generate_filename():
 
125
  max_generate_audio_seconds=max_generate_audio_seconds)
126
  return music_generation(args)
127
 
128
+ def main():
129
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
130
+ gr.Markdown("""
131
+ # InspireMusic
132
+ - Support text-to-music, music continuation, audio super-resolution, audio reconstruction tasks with high audio quality, with available sampling rates of 24kHz, 48kHz.
133
+ - Support long audio generation in multiple output audio formats, i.e., wav, flac, mp3, m4a.
134
+ - Open-source [InspireMusic-Base](https://modelscope.cn/models/iic/InspireMusic/summary), [InspireMusic-Base-24kHz](https://modelscope.cn/models/iic/InspireMusic-Base-24kHz/summary), [InspireMusic-1.5B](https://modelscope.cn/models/iic/InspireMusic-1.5B/summary), [InspireMusic-1.5B-24kHz](https://modelscope.cn/models/iic/InspireMusic-1.5B-24kHz/summary), [InspireMusic-1.5B-Long](https://modelscope.cn/models/iic/InspireMusic-1.5B-Long/summary) models for music generation.
135
+ - Currently only support English text prompts.
136
+ """)
137
+
138
+ with gr.Row(equal_height=True):
139
+ model_name = gr.Dropdown(["InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-1.5B-24kHz", "InspireMusic-Base", "InspireMusic-Base-24kHz"], label="Select Model Name", value="InspireMusic-Base")
140
+ chorus = gr.Dropdown(["intro", "verse", "chorus", "outro"],
141
+ label="Chorus Mode", value="intro")
142
+ output_sample_rate = gr.Dropdown([48000, 24000],
143
+ label="Output Audio Sample Rate (Hz)",
144
+ value=48000)
145
+ max_generate_audio_seconds = gr.Slider(10, 120,
146
+ label="Generate Audio Length (s)",
147
+ value=30)
148
+
149
+ with gr.Row(equal_height=True):
150
+ # Textbox for custom input
151
+ text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)", value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
152
+
153
+ audio_input = gr.Audio(label="Input Audio Prompt (For Music Continuation Task)",
154
+ type="filepath")
155
+ music_output = gr.Audio(label="Generated Music", type="filepath")
156
+
157
+ with gr.Row():
158
+ button = gr.Button("Text to Music")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  button.click(run_text2music,
160
+ inputs=[text_input, model_name,
161
+ chorus,
162
+ output_sample_rate,
163
+ max_generate_audio_seconds],
164
+ outputs=music_output)
165
+
166
+ generate_button = gr.Button("Music Continuation")
167
+ generate_button.click(run_continuation,
168
+ inputs=[text_input, audio_input, model_name,
169
+ chorus,
170
+ output_sample_rate,
171
+ max_generate_audio_seconds],
172
+ outputs=music_output)
173
+
174
+ with gr.Column():
175
+ default_prompt_buttons = []
176
+ for prompt in default_prompts:
177
+ button = gr.Button(value=prompt)
178
+ button.click(run_text2music,
179
+ inputs=[text_input, model_name,
180
+ chorus,
181
+ output_sample_rate,
182
+ max_generate_audio_seconds],
183
+ outputs=music_output)
184
+ default_prompt_buttons.append(button)
185
+ demo.launch()
186
+
187
+ if __name__ == '__main__':
188
+ model_list = ["InspireMusic-Base"]
189
+ for model_name in model_list:
190
+ if model_name == "InspireMusic-Base":
191
+ snapshot_download(f"iic/InspireMusic", local_dir=model_dir)
192
+ else:
193
+ snapshot_download(f"iic/{model_name}", local_dir=model_dir)os.system('nvidia-smi')
194
+ main()