chong.zhang commited on
Commit
acf41f6
·
1 Parent(s): 4e0151a

add wavtokenizer version

Browse files
Files changed (2) hide show
  1. app.py +25 -25
  2. inspiremusic/cli/inference.py +5 -0
app.py CHANGED
@@ -170,7 +170,7 @@ def main():
170
 
171
  # with gr.Row(equal_height=True):
172
  text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)", value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
173
- music_output = gr.Audio(label="Text to Music Output", type="filepath")
174
 
175
  button = gr.Button("Text to Music")
176
  button.click(demo_inspiremusic_t2m,
@@ -183,7 +183,7 @@ def main():
183
  audio_input = gr.Audio(label="Input Audio Prompt (For Music Continuation Task)",
184
  type="filepath")
185
 
186
- music_con_output = gr.Audio(label="Music Continuation Output", type="filepath")
187
  generate_button = gr.Button("Music Continuation")
188
  generate_button.click(demo_inspiremusic_con,
189
  inputs=[text_input, audio_input, model_name,
@@ -192,29 +192,29 @@ def main():
192
  max_generate_audio_seconds],
193
  outputs=music_con_output)
194
 
195
- t2m_demo = gr.Interface(fn=demo_inspiremusic_t2m,
196
- inputs=[
197
- gr.Textbox(
198
- label="Input Text (For Text-to-Music Task)",
199
- value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
200
- ],
201
- outputs=[
202
- gr.Audio(label="Output Audio", type="filepath"),
203
- ],
204
- title="InspireMusic",
205
- description=("test"),
206
- article=(
207
- "<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement (ICASSP 2022)</a> </p>"
208
- "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation (ICASSP 2024)</a> </p>"
209
- "<p style='text-align: center'><a href='https://arxiv.org/abs/2501.10045' target='_blank'>HiFi-SR: A Unified Generative Transformer-Convolutional Adversarial Network for High-Fidelity Speech Super-Resolution (ICASSP 2025)</a> </p>"),
210
- examples=[
211
- ["examples/sample.wav", True],
212
- ],
213
- cache_examples=True)
214
- with gr.Blocks():
215
- gr.TabbedInterface([demo, t2m_demo],
216
- ["Task 1",
217
- "Task 2"])
218
 
219
  demo.launch()
220
 
 
170
 
171
  # with gr.Row(equal_height=True):
172
  text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)", value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
173
+ music_output = gr.Audio(label="Text to Music Output", type="filepath", autoplay=True)
174
 
175
  button = gr.Button("Text to Music")
176
  button.click(demo_inspiremusic_t2m,
 
183
  audio_input = gr.Audio(label="Input Audio Prompt (For Music Continuation Task)",
184
  type="filepath")
185
 
186
+ music_con_output = gr.Audio(label="Music Continuation Output", type="filepath", autoplay=True)
187
  generate_button = gr.Button("Music Continuation")
188
  generate_button.click(demo_inspiremusic_con,
189
  inputs=[text_input, audio_input, model_name,
 
192
  max_generate_audio_seconds],
193
  outputs=music_con_output)
194
 
195
+ # t2m_demo = gr.Interface(fn=demo_inspiremusic_t2m,
196
+ # inputs=[
197
+ # gr.Textbox(
198
+ # label="Input Text (For Text-to-Music Task)",
199
+ # value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
200
+ # ],
201
+ # outputs=[
202
+ # gr.Audio(label="Output Audio", type="filepath", autoplay=True),
203
+ # ],
204
+ # title="InspireMusic",
205
+ # description=("test"),
206
+ # article=(
207
+ # "<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement (ICASSP 2022)</a> </p>"
208
+ # "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation (ICASSP 2024)</a> </p>"
209
+ # "<p style='text-align: center'><a href='https://arxiv.org/abs/2501.10045' target='_blank'>HiFi-SR: A Unified Generative Transformer-Convolutional Adversarial Network for High-Fidelity Speech Super-Resolution (ICASSP 2025)</a> </p>"),
210
+ # examples=[
211
+ # ["examples/sample.wav", True],
212
+ # ],
213
+ # cache_examples=True)
214
+ # with gr.Blocks():
215
+ # gr.TabbedInterface([demo, t2m_demo],
216
+ # ["Task 1",
217
+ # "Task 2"])
218
 
219
  demo.launch()
220
 
inspiremusic/cli/inference.py CHANGED
@@ -196,6 +196,11 @@ class InspireMusicUnified:
196
 
197
  else:
198
  logging.error(f"Generated audio length is shorter than minimum required audio length.")
 
 
 
 
 
199
 
200
  def get_args():
201
  parser = argparse.ArgumentParser(description='Run inference with your model')
 
196
 
197
  else:
198
  logging.error(f"Generated audio length is shorter than minimum required audio length.")
199
+ if music_fn:
200
+ if os.path.exists(music_fn):
201
+ return music_fn
202
+ else:
203
+ logging.error(f"{music_fn} does not exist.")
204
 
205
  def get_args():
206
  parser = argparse.ArgumentParser(description='Run inference with your model')