Spaces:
Running
on
Zero
Running
on
Zero
chong.zhang
commited on
Commit
·
b3d320b
1
Parent(s):
e35a9be
update
Browse files
app.py
CHANGED
@@ -39,6 +39,8 @@ import hashlib
|
|
39 |
import importlib
|
40 |
|
41 |
MODELS = ["InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-1.5B-24kHz", "InspireMusic-Base", "InspireMusic-Base-24kHz"]
|
|
|
|
|
42 |
|
43 |
def generate_filename():
|
44 |
hash_object = hashlib.sha256(str(int(datetime.datetime.now().timestamp())).encode())
|
@@ -70,7 +72,7 @@ def get_args(
|
|
70 |
"max_audio_prompt_length": 5.0,
|
71 |
"model_dir" : os.path.join("pretrained_models",
|
72 |
model_name),
|
73 |
-
"result_dir" :
|
74 |
"output_fn" : generate_filename(),
|
75 |
"format" : "wav",
|
76 |
"time_start" : time_start,
|
@@ -90,7 +92,7 @@ def trim_audio(audio_file, cut_seconds=5):
|
|
90 |
audio, sr = torchaudio.load(audio_file)
|
91 |
num_samples = cut_seconds * sr
|
92 |
cutted_audio = audio[:, :num_samples]
|
93 |
-
output_path = os.path.join(
|
94 |
torchaudio.save(output_path, cutted_audio, sr)
|
95 |
return output_path
|
96 |
|
@@ -158,7 +160,9 @@ def main():
|
|
158 |
""")
|
159 |
|
160 |
with gr.Row(equal_height=True):
|
161 |
-
model_name = gr.Dropdown(
|
|
|
|
|
162 |
chorus = gr.Dropdown(["intro", "verse", "chorus", "outro"],
|
163 |
label="Chorus Mode", value="intro")
|
164 |
output_sample_rate = gr.Dropdown([48000, 24000],
|
@@ -168,55 +172,35 @@ def main():
|
|
168 |
label="Generate Audio Length (s)",
|
169 |
value=30)
|
170 |
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
# inputs=[
|
197 |
-
# gr.Textbox(
|
198 |
-
# label="Input Text (For Text-to-Music Task)",
|
199 |
-
# value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
|
200 |
-
# ],
|
201 |
-
# outputs=[
|
202 |
-
# gr.Audio(label="Output Audio", type="filepath", autoplay=True),
|
203 |
-
# ],
|
204 |
-
# title="InspireMusic",
|
205 |
-
# description=("test"),
|
206 |
-
# article=(
|
207 |
-
# "<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement (ICASSP 2022)</a> </p>"
|
208 |
-
# "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation (ICASSP 2024)</a> </p>"
|
209 |
-
# "<p style='text-align: center'><a href='https://arxiv.org/abs/2501.10045' target='_blank'>HiFi-SR: A Unified Generative Transformer-Convolutional Adversarial Network for High-Fidelity Speech Super-Resolution (ICASSP 2025)</a> </p>"),
|
210 |
-
# examples=[
|
211 |
-
# ["examples/sample.wav", True],
|
212 |
-
# ],
|
213 |
-
# cache_examples=True)
|
214 |
-
# with gr.Blocks():
|
215 |
-
# gr.TabbedInterface([demo, t2m_demo],
|
216 |
-
# ["Task 1",
|
217 |
-
# "Task 2"])
|
218 |
|
219 |
demo.launch()
|
220 |
|
221 |
if __name__ == '__main__':
|
|
|
|
|
222 |
main()
|
|
|
39 |
import importlib
|
40 |
|
41 |
MODELS = ["InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-1.5B-24kHz", "InspireMusic-Base", "InspireMusic-Base-24kHz"]
|
42 |
+
AUDIO_PROMPT_DIR = "audio_prompts"
|
43 |
+
OUTPUT_AUDIO_DIR = "demo/outputs"
|
44 |
|
45 |
def generate_filename():
|
46 |
hash_object = hashlib.sha256(str(int(datetime.datetime.now().timestamp())).encode())
|
|
|
72 |
"max_audio_prompt_length": 5.0,
|
73 |
"model_dir" : os.path.join("pretrained_models",
|
74 |
model_name),
|
75 |
+
"result_dir" : OUTPUT_AUDIO_DIR,
|
76 |
"output_fn" : generate_filename(),
|
77 |
"format" : "wav",
|
78 |
"time_start" : time_start,
|
|
|
92 |
audio, sr = torchaudio.load(audio_file)
|
93 |
num_samples = cut_seconds * sr
|
94 |
cutted_audio = audio[:, :num_samples]
|
95 |
+
output_path = os.path.join(AUDIO_PROMPT_DIR, "audio_prompt_" + generate_filename() + ".wav")
|
96 |
torchaudio.save(output_path, cutted_audio, sr)
|
97 |
return output_path
|
98 |
|
|
|
160 |
""")
|
161 |
|
162 |
with gr.Row(equal_height=True):
|
163 |
+
model_name = gr.Dropdown(
|
164 |
+
MODELS, label="Select Model Name",
|
165 |
+
value="InspireMusic-1.5B-Long")
|
166 |
chorus = gr.Dropdown(["intro", "verse", "chorus", "outro"],
|
167 |
label="Chorus Mode", value="intro")
|
168 |
output_sample_rate = gr.Dropdown([48000, 24000],
|
|
|
172 |
label="Generate Audio Length (s)",
|
173 |
value=30)
|
174 |
|
175 |
+
with gr.Row(equal_height=True):
|
176 |
+
text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)",
|
177 |
+
value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
|
178 |
+
|
179 |
+
audio_input = gr.Audio(
|
180 |
+
label="Input Audio Prompt (For Music Continuation Task)",
|
181 |
+
type="filepath")
|
182 |
+
music_output = gr.Audio(label="Generated Music", type="filepath", autoplay=True)
|
183 |
+
|
184 |
+
with gr.Row():
|
185 |
+
button = gr.Button("Text to Music")
|
186 |
+
button.click(demo_inspiremusic_t2m,
|
187 |
+
inputs=[text_input, model_name,
|
188 |
+
chorus,
|
189 |
+
output_sample_rate,
|
190 |
+
max_generate_audio_seconds],
|
191 |
+
outputs=music_output)
|
192 |
+
|
193 |
+
generate_button = gr.Button("Music Continuation")
|
194 |
+
generate_button.click(demo_inspiremusic_con,
|
195 |
+
inputs=[text_input, audio_input, model_name,
|
196 |
+
chorus,
|
197 |
+
output_sample_rate,
|
198 |
+
max_generate_audio_seconds],
|
199 |
+
outputs=music_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
demo.launch()
|
202 |
|
203 |
if __name__ == '__main__':
|
204 |
+
os.makedirs(AUDIO_PROMPT_DIR, exist_ok=True)
|
205 |
+
os.makedirs(OUTPUT_AUDIO_DIR, exist_ok=True)
|
206 |
main()
|