chong.zhang commited on
Commit
b3d320b
·
1 Parent(s): e35a9be
Files changed (1) hide show
  1. app.py +34 -50
app.py CHANGED
@@ -39,6 +39,8 @@ import hashlib
39
  import importlib
40
 
41
  MODELS = ["InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-1.5B-24kHz", "InspireMusic-Base", "InspireMusic-Base-24kHz"]
 
 
42
 
43
  def generate_filename():
44
  hash_object = hashlib.sha256(str(int(datetime.datetime.now().timestamp())).encode())
@@ -70,7 +72,7 @@ def get_args(
70
  "max_audio_prompt_length": 5.0,
71
  "model_dir" : os.path.join("pretrained_models",
72
  model_name),
73
- "result_dir" : "exp/inspiremusic",
74
  "output_fn" : generate_filename(),
75
  "format" : "wav",
76
  "time_start" : time_start,
@@ -90,7 +92,7 @@ def trim_audio(audio_file, cut_seconds=5):
90
  audio, sr = torchaudio.load(audio_file)
91
  num_samples = cut_seconds * sr
92
  cutted_audio = audio[:, :num_samples]
93
- output_path = os.path.join(os.getcwd(), "audio_prompt_" + generate_filename() + ".wav")
94
  torchaudio.save(output_path, cutted_audio, sr)
95
  return output_path
96
 
@@ -158,7 +160,9 @@ def main():
158
  """)
159
 
160
  with gr.Row(equal_height=True):
161
- model_name = gr.Dropdown(MODELS, label="Select Model Name", value="InspireMusic-1.5B-Long")
 
 
162
  chorus = gr.Dropdown(["intro", "verse", "chorus", "outro"],
163
  label="Chorus Mode", value="intro")
164
  output_sample_rate = gr.Dropdown([48000, 24000],
@@ -168,55 +172,35 @@ def main():
168
  label="Generate Audio Length (s)",
169
  value=30)
170
 
171
- # with gr.Row(equal_height=True):
172
- text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)", value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
173
- music_output = gr.Audio(label="Text to Music Output", type="filepath", autoplay=True)
174
-
175
- button = gr.Button("Text to Music")
176
- button.click(demo_inspiremusic_t2m,
177
- inputs=[text_input, model_name,
178
- chorus,
179
- output_sample_rate,
180
- max_generate_audio_seconds],
181
- outputs=music_output)
182
-
183
- audio_input = gr.Audio(label="Input Audio Prompt (For Music Continuation Task)",
184
- type="filepath")
185
-
186
- music_con_output = gr.Audio(label="Music Continuation Output", type="filepath", autoplay=True)
187
- generate_button = gr.Button("Music Continuation")
188
- generate_button.click(demo_inspiremusic_con,
189
- inputs=[text_input, audio_input, model_name,
190
- chorus,
191
- output_sample_rate,
192
- max_generate_audio_seconds],
193
- outputs=music_con_output)
194
-
195
- # t2m_demo = gr.Interface(fn=demo_inspiremusic_t2m,
196
- # inputs=[
197
- # gr.Textbox(
198
- # label="Input Text (For Text-to-Music Task)",
199
- # value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
200
- # ],
201
- # outputs=[
202
- # gr.Audio(label="Output Audio", type="filepath", autoplay=True),
203
- # ],
204
- # title="InspireMusic",
205
- # description=("test"),
206
- # article=(
207
- # "<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement (ICASSP 2022)</a> </p>"
208
- # "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation (ICASSP 2024)</a> </p>"
209
- # "<p style='text-align: center'><a href='https://arxiv.org/abs/2501.10045' target='_blank'>HiFi-SR: A Unified Generative Transformer-Convolutional Adversarial Network for High-Fidelity Speech Super-Resolution (ICASSP 2025)</a> </p>"),
210
- # examples=[
211
- # ["examples/sample.wav", True],
212
- # ],
213
- # cache_examples=True)
214
- # with gr.Blocks():
215
- # gr.TabbedInterface([demo, t2m_demo],
216
- # ["Task 1",
217
- # "Task 2"])
218
 
219
  demo.launch()
220
 
221
  if __name__ == '__main__':
 
 
222
  main()
 
39
  import importlib
40
 
41
  MODELS = ["InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-1.5B-24kHz", "InspireMusic-Base", "InspireMusic-Base-24kHz"]
42
+ AUDIO_PROMPT_DIR = "audio_prompts"
43
+ OUTPUT_AUDIO_DIR = "demo/outputs"
44
 
45
  def generate_filename():
46
  hash_object = hashlib.sha256(str(int(datetime.datetime.now().timestamp())).encode())
 
72
  "max_audio_prompt_length": 5.0,
73
  "model_dir" : os.path.join("pretrained_models",
74
  model_name),
75
+ "result_dir" : OUTPUT_AUDIO_DIR,
76
  "output_fn" : generate_filename(),
77
  "format" : "wav",
78
  "time_start" : time_start,
 
92
  audio, sr = torchaudio.load(audio_file)
93
  num_samples = cut_seconds * sr
94
  cutted_audio = audio[:, :num_samples]
95
+ output_path = os.path.join(AUDIO_PROMPT_DIR, "audio_prompt_" + generate_filename() + ".wav")
96
  torchaudio.save(output_path, cutted_audio, sr)
97
  return output_path
98
 
 
160
  """)
161
 
162
  with gr.Row(equal_height=True):
163
+ model_name = gr.Dropdown(
164
+ MODELS, label="Select Model Name",
165
+ value="InspireMusic-1.5B-Long")
166
  chorus = gr.Dropdown(["intro", "verse", "chorus", "outro"],
167
  label="Chorus Mode", value="intro")
168
  output_sample_rate = gr.Dropdown([48000, 24000],
 
172
  label="Generate Audio Length (s)",
173
  value=30)
174
 
175
+ with gr.Row(equal_height=True):
176
+ text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)",
177
+ value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
178
+
179
+ audio_input = gr.Audio(
180
+ label="Input Audio Prompt (For Music Continuation Task)",
181
+ type="filepath")
182
+ music_output = gr.Audio(label="Generated Music", type="filepath", autoplay=True)
183
+
184
+ with gr.Row():
185
+ button = gr.Button("Text to Music")
186
+ button.click(demo_inspiremusic_t2m,
187
+ inputs=[text_input, model_name,
188
+ chorus,
189
+ output_sample_rate,
190
+ max_generate_audio_seconds],
191
+ outputs=music_output)
192
+
193
+ generate_button = gr.Button("Music Continuation")
194
+ generate_button.click(demo_inspiremusic_con,
195
+ inputs=[text_input, audio_input, model_name,
196
+ chorus,
197
+ output_sample_rate,
198
+ max_generate_audio_seconds],
199
+ outputs=music_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  demo.launch()
202
 
203
  if __name__ == '__main__':
204
+ os.makedirs(AUDIO_PROMPT_DIR, exist_ok=True)
205
+ os.makedirs(OUTPUT_AUDIO_DIR, exist_ok=True)
206
  main()