Manjushri commited on
Commit
fe7ee5e
·
1 Parent(s): d5d153a

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -344
app.py DELETED
@@ -1,344 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
-
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- # Updated to account for UI changes from https://github.com/rkfg/audiocraft/blob/long/app.py
8
- # also released under the MIT license.
9
-
10
- import argparse
11
- from concurrent.futures import ProcessPoolExecutor
12
- import os
13
- import subprocess as sp
14
- from tempfile import NamedTemporaryFile
15
- import time
16
- import warnings
17
- import modin.pandas as pd
18
- import torch
19
- import gradio as gr
20
-
21
- from audiocraft.data.audio_utils import convert_audio
22
- from audiocraft.data.audio import audio_write
23
- from audiocraft.models import MusicGen
24
-
25
-
26
- MODEL = None # Last used model
27
- IS_BATCHED = "facebook/MusicGen" in os.environ.get('SPACE_ID', '')
28
- MAX_BATCH_SIZE = 6
29
- BATCHED_DURATION = 15
30
- INTERRUPTING = True
31
- # We have to wrap subprocess call to clean a bit the log when using gr.make_waveform
32
- _old_call = sp.call
33
-
34
-
35
- def _call_nostderr(*args, **kwargs):
36
- # Avoid ffmpeg vomitting on the logs.
37
- kwargs['stderr'] = sp.DEVNULL
38
- kwargs['stdout'] = sp.DEVNULL
39
- _old_call(*args, **kwargs)
40
-
41
-
42
- sp.call = _call_nostderr
43
- # Preallocating the pool of processes.
44
- pool = ProcessPoolExecutor(3)
45
- pool.__enter__()
46
-
47
-
48
- def interrupt():
49
- global INTERRUPTING
50
- INTERRUPTING = True
51
-
52
-
53
- def make_waveform(*args, **kwargs):
54
- # Further remove some warnings.
55
- be = time.time()
56
- with warnings.catch_warnings():
57
- warnings.simplefilter('ignore')
58
- out = gr.make_waveform(*args, **kwargs)
59
- print("Make a video took", time.time() - be)
60
- return out
61
-
62
-
63
- def load_model(version='melody'):
64
- global MODEL
65
- print("Loading model", version)
66
- if MODEL is None or MODEL.name != version:
67
- MODEL = MusicGen.get_pretrained(version)
68
-
69
-
70
- def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
71
- MODEL.set_generation_params(duration=duration, **gen_kwargs)
72
- print("new batch", len(texts), texts, [None if m is None else (m[0], m[1].shape) for m in melodies])
73
- be = time.time()
74
- processed_melodies = []
75
- target_sr = 32000
76
- target_ac = 1
77
- for melody in melodies:
78
- if melody is None:
79
- processed_melodies.append(None)
80
- else:
81
- sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t()
82
- if melody.dim() == 1:
83
- melody = melody[None]
84
- melody = melody[..., :int(sr * duration)]
85
- melody = convert_audio(melody, sr, target_sr, target_ac)
86
- processed_melodies.append(melody)
87
-
88
- if any(m is not None for m in processed_melodies):
89
- outputs = MODEL.generate_with_chroma(
90
- descriptions=texts,
91
- melody_wavs=processed_melodies,
92
- melody_sample_rate=target_sr,
93
- progress=progress,
94
- )
95
- else:
96
- outputs = MODEL.generate(texts, progress=progress)
97
-
98
- outputs = outputs.detach().cpu().float()
99
- out_files = []
100
- for output in outputs:
101
- with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
102
- audio_write(
103
- file.name, output, MODEL.sample_rate, strategy="loudness",
104
- loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
105
- out_files.append(pool.submit(make_waveform, file.name))
106
- res = [out_file.result() for out_file in out_files]
107
- print("batch finished", len(texts), time.time() - be)
108
- return res
109
-
110
-
111
- def predict_batched(texts, melodies):
112
- max_text_length = 512
113
- texts = [text[:max_text_length] for text in texts]
114
- load_model('melody')
115
- res = _do_predictions(texts, melodies, BATCHED_DURATION)
116
- return [res]
117
-
118
-
119
- def predict_full(model, text, melody, duration, topk, topp, temperature, cfg_coef, progress=gr.Progress()):
120
- global INTERRUPTING
121
- INTERRUPTING = False
122
- topk = int(topk)
123
- load_model(model)
124
-
125
- def _progress(generated, to_generate):
126
- progress((generated, to_generate))
127
- if INTERRUPTING:
128
- raise gr.Error("Interrupted.")
129
- MODEL.set_custom_progress_callback(_progress)
130
-
131
- outs = _do_predictions(
132
- [text], [melody], duration, progress=True,
133
- top_k=topk, top_p=topp, temperature=temperature, cfg_coef=cfg_coef)
134
- return outs[0]
135
-
136
-
137
- def ui_full(launch_kwargs):
138
- with gr.Blocks() as interface:
139
- gr.Markdown(
140
- """
141
- # MusicGen
142
- This is a demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
143
- presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
144
- """
145
- )
146
- with gr.Row():
147
- with gr.Column():
148
- with gr.Row():
149
- text = gr.Text(label="Input Text", interactive=True)
150
- melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
151
- with gr.Row():
152
- submit = gr.Button("Submit")
153
- # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
154
- _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
155
- with gr.Row():
156
- model = gr.Radio(["melody", "large", "medium", "small"], label="Model", value="melody", interactive=True)
157
- with gr.Row():
158
- duration = gr.Slider(minimum=1, maximum=120, value=16, label="Duration", interactive=True)
159
- with gr.Row():
160
- topk = gr.Number(label="Top-k", value=250, interactive=True)
161
- topp = gr.Number(label="Top-p", value=0, interactive=True)
162
- temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
163
- cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
164
- with gr.Column():
165
- output = gr.Video(label="Generated Music")
166
- submit.click(predict_full, inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef], outputs=[output])
167
- gr.Examples(
168
- fn=predict_full,
169
- examples=[
170
- [
171
- "An 80s driving pop song with heavy drums and synth pads in the background",
172
- "./bach.mp3",
173
- "melody"
174
- ],
175
- [
176
- "A cheerful country song with acoustic guitars",
177
- "./bolero_ravel.mp3",
178
- "melody"
179
- ],
180
- [
181
- "90s rock song with electric guitar and heavy drums",
182
- None,
183
- "medium"
184
- ],
185
- [
186
- "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
187
- "./bach.mp3",
188
- "melody"
189
- ],
190
- [
191
- "lofi slow bpm electro chill with organic samples",
192
- None,
193
- "medium",
194
- ],
195
- ],
196
- inputs=[text, melody, model],
197
- outputs=[output]
198
- )
199
- gr.Markdown(
200
- """
201
- ### More details
202
-
203
- The model will generate a short music extract based on the description you provided.
204
- The model can generate up to 30 seconds of audio in one pass. It is now possible
205
- to extend the generation by feeding back the end of the previous chunk of audio.
206
- This can take a long time, and the model might lose consistency. The model might also
207
- decide at arbitrary positions that the song ends.
208
-
209
- **WARNING:** Choosing long durations will take a long time to generate (2min might take ~10min). An overlap of 12 seconds
210
- is kept with the previously generated chunk, and 18 "new" seconds are generated each time.
211
-
212
- We present 4 model variations:
213
- 1. Melody -- a music generation model capable of generating music condition on text and melody inputs. **Note**, you can also use text only.
214
- 2. Small -- a 300M transformer decoder conditioned on text only.
215
- 3. Medium -- a 1.5B transformer decoder conditioned on text only.
216
- 4. Large -- a 3.3B transformer decoder conditioned on text only (might OOM for the longest sequences.)
217
-
218
- When using `melody`, you can optionaly provide a reference audio from
219
- which a broad melody will be extracted. The model will then try to follow both the description and melody provided.
220
-
221
- You can also use your own GPU or a Google Colab by following the instructions on our repo.
222
- See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
223
- for more details.
224
- """
225
- )
226
-
227
- interface.queue(max_size=2).launch(**launch_kwargs)
228
-
229
-
230
- def ui_batched(launch_kwargs):
231
- with gr.Blocks() as demo:
232
- gr.Markdown(
233
- """
234
- # MusicGen
235
-
236
- This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
237
- presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
238
- <br/>
239
- <a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
240
- <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
241
- for longer sequences, more control and no queue.</p>
242
- """
243
- )
244
- with gr.Row():
245
- with gr.Column():
246
- with gr.Row():
247
- text = gr.Text(label="Describe your music", lines=2, interactive=True)
248
- melody = gr.Audio(source="upload", type="numpy", label="Condition on a melody (optional)", interactive=True)
249
- with gr.Row():
250
- submit = gr.Button("Generate")
251
- with gr.Column():
252
- output = gr.Video(label="Generated Music")
253
- submit.click(predict_batched, inputs=[text, melody], outputs=[output], batch=True, max_batch_size=MAX_BATCH_SIZE)
254
- gr.Examples(
255
- fn=predict_batched,
256
- examples=[
257
- [
258
- "An 80s driving pop song with heavy drums and synth pads in the background",
259
- "./assets/bach.mp3",
260
- ],
261
- [
262
- "A cheerful country song with acoustic guitars",
263
- "./assets/bolero_ravel.mp3",
264
- ],
265
- [
266
- "90s rock song with electric guitar and heavy drums",
267
- None,
268
- ],
269
- [
270
- "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions bpm: 130",
271
- "./assets/bach.mp3",
272
- ],
273
- [
274
- "lofi slow bpm electro chill with organic samples",
275
- None,
276
- ],
277
- ],
278
- inputs=[text, melody],
279
- outputs=[output]
280
- )
281
- gr.Markdown("""
282
- ### More details
283
-
284
- The model will generate 12 seconds of audio based on the description you provided.
285
- You can optionaly provide a reference audio from which a broad melody will be extracted.
286
- The model will then try to follow both the description and melody provided.
287
- All samples are generated with the `melody` model.
288
-
289
- You can also use your own GPU or a Google Colab by following the instructions on our repo.
290
-
291
- See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
292
- for more details.
293
- """)
294
-
295
- demo.queue(max_size=3).launch(**launch_kwargs)
296
-
297
-
298
- if __name__ == "__main__":
299
- parser = argparse.ArgumentParser()
300
- parser.add_argument(
301
- '--listen',
302
- type=str,
303
- default='0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1',
304
- help='IP to listen on for connections to Gradio',
305
- )
306
- parser.add_argument(
307
- '--username', type=str, default='', help='Username for authentication'
308
- )
309
- parser.add_argument(
310
- '--password', type=str, default='', help='Password for authentication'
311
- )
312
- parser.add_argument(
313
- '--server_port',
314
- type=int,
315
- default=0,
316
- help='Port to run the server listener on',
317
- )
318
- parser.add_argument(
319
- '--inbrowser', action='store_true', help='Open in browser'
320
- )
321
- parser.add_argument(
322
- '--share', action='store_true', help='Share the gradio UI'
323
- )
324
-
325
- args = parser.parse_args()
326
-
327
- launch_kwargs = {}
328
- launch_kwargs['server_name'] = args.listen
329
-
330
- if args.username and args.password:
331
- launch_kwargs['auth'] = (args.username, args.password)
332
- if args.server_port:
333
- launch_kwargs['server_port'] = args.server_port
334
- if args.inbrowser:
335
- launch_kwargs['inbrowser'] = args.inbrowser
336
- if args.share:
337
- launch_kwargs['share'] = args.share
338
-
339
- # Show the interface
340
- if IS_BATCHED:
341
- ui_batched(launch_kwargs)
342
- else:
343
- ui_full(launch_kwargs)
344
-