Alexxggs commited on
Commit
6424905
·
1 Parent(s): f4f2f9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -15
app.py CHANGED
@@ -138,22 +138,24 @@ def ui_full(launch_kwargs):
138
  with gr.Blocks() as interface:
139
  gr.Markdown(
140
  """
141
-
 
 
142
  """
143
  )
144
  with gr.Row():
145
  with gr.Column():
146
  with gr.Row():
147
- text = gr.Text(label="Текст пример (bass drum cyberpunk)", interactive=True)
148
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
149
  with gr.Row():
150
- submit = gr.Button("Создать")
151
-
152
- _ = gr.Button("Прервать").click(fn=interrupt, queue=False)
153
  with gr.Row():
154
- model = gr.Radio(["melody", "medium", "small", "large"], label="Тип трека", value="melody", interactive=True)
155
  with gr.Row():
156
- duration = gr.Slider(minimum=1, maximum=120, value=10, label="Время трека(seconds)", interactive=True)
157
  with gr.Row():
158
  topk = gr.Number(label="Top-k", value=250, interactive=True)
159
  topp = gr.Number(label="Top-p", value=0, interactive=True)
@@ -162,10 +164,58 @@ def ui_full(launch_kwargs):
162
  with gr.Column():
163
  output = gr.Video(label="Generated Music")
164
  submit.click(predict_full, inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef], outputs=[output])
165
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  gr.Markdown(
167
  """
168
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  """
170
  )
171
 
@@ -176,13 +226,19 @@ def ui_batched(launch_kwargs):
176
  with gr.Blocks() as demo:
177
  gr.Markdown(
178
  """
179
-
 
 
 
 
 
 
180
  """
181
  )
182
  with gr.Row():
183
  with gr.Column():
184
  with gr.Row():
185
- text = gr.Text(label="Describe your music", lines=2, interactive=True)
186
  melody = gr.Audio(source="upload", type="numpy", label="Condition on a melody (optional)", interactive=True)
187
  with gr.Row():
188
  submit = gr.Button("Generate")
@@ -218,14 +274,11 @@ def ui_batched(launch_kwargs):
218
  )
219
  gr.Markdown("""
220
  ### More details
221
-
222
  The model will generate 12 seconds of audio based on the description you provided.
223
  You can optionaly provide a reference audio from which a broad melody will be extracted.
224
  The model will then try to follow both the description and melody provided.
225
  All samples are generated with the `melody` model.
226
-
227
  You can also use your own GPU or a Google Colab by following the instructions on our repo.
228
-
229
  See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
230
  for more details.
231
  """)
@@ -278,4 +331,4 @@ if __name__ == "__main__":
278
  if IS_BATCHED:
279
  ui_batched(launch_kwargs)
280
  else:
281
- ui_full(launch_kwargs)
 
138
  with gr.Blocks() as interface:
139
  gr.Markdown(
140
  """
141
+ # MusicGen
142
+ This is your private demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
143
+ presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
144
  """
145
  )
146
  with gr.Row():
147
  with gr.Column():
148
  with gr.Row():
149
+ text = gr.Text(label="Input Text", interactive=True)
150
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
151
  with gr.Row():
152
+ submit = gr.Button("Submit")
153
+ # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
154
+ _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
155
  with gr.Row():
156
+ model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
157
  with gr.Row():
158
+ duration = gr.Slider(minimum=1, maximum=120, value=10, label="Duration", interactive=True)
159
  with gr.Row():
160
  topk = gr.Number(label="Top-k", value=250, interactive=True)
161
  topp = gr.Number(label="Top-p", value=0, interactive=True)
 
164
  with gr.Column():
165
  output = gr.Video(label="Generated Music")
166
  submit.click(predict_full, inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef], outputs=[output])
167
+ gr.Examples(
168
+ fn=predict_full,
169
+ examples=[
170
+ [
171
+ "An 80s driving pop song with heavy drums and synth pads in the background",
172
+ "./assets/bach.mp3",
173
+ "melody"
174
+ ],
175
+ [
176
+ "A cheerful country song with acoustic guitars",
177
+ "./assets/bolero_ravel.mp3",
178
+ "melody"
179
+ ],
180
+ [
181
+ "90s rock song with electric guitar and heavy drums",
182
+ None,
183
+ "medium"
184
+ ],
185
+ [
186
+ "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
187
+ "./assets/bach.mp3",
188
+ "melody"
189
+ ],
190
+ [
191
+ "lofi slow bpm electro chill with organic samples",
192
+ None,
193
+ "medium",
194
+ ],
195
+ ],
196
+ inputs=[text, melody, model],
197
+ outputs=[output]
198
+ )
199
  gr.Markdown(
200
  """
201
+ ### More details
202
+ The model will generate a short music extract based on the description you provided.
203
+ The model can generate up to 30 seconds of audio in one pass. It is now possible
204
+ to extend the generation by feeding back the end of the previous chunk of audio.
205
+ This can take a long time, and the model might lose consistency. The model might also
206
+ decide at arbitrary positions that the song ends.
207
+ **WARNING:** Choosing long durations will take a long time to generate (2min might take ~10min). An overlap of 12 seconds
208
+ is kept with the previously generated chunk, and 18 "new" seconds are generated each time.
209
+ We present 4 model variations:
210
+ 1. Melody -- a music generation model capable of generating music condition on text and melody inputs. **Note**, you can also use text only.
211
+ 2. Small -- a 300M transformer decoder conditioned on text only.
212
+ 3. Medium -- a 1.5B transformer decoder conditioned on text only.
213
+ 4. Large -- a 3.3B transformer decoder conditioned on text only (might OOM for the longest sequences.)
214
+ When using `melody`, ou can optionaly provide a reference audio from
215
+ which a broad melody will be extracted. The model will then try to follow both the description and melody provided.
216
+ You can also use your own GPU or a Google Colab by following the instructions on our repo.
217
+ See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
218
+ for more details.
219
  """
220
  )
221
 
 
226
  with gr.Blocks() as demo:
227
  gr.Markdown(
228
  """
229
+ # MusicGen
230
+ This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
231
+ presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
232
+ <br/>
233
+ <a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
234
+ <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
235
+ for longer sequences, more control and no queue.</p>
236
  """
237
  )
238
  with gr.Row():
239
  with gr.Column():
240
  with gr.Row():
241
+ text = gr.Text(label="Describe your music", lines=2, interactive=True)
242
  melody = gr.Audio(source="upload", type="numpy", label="Condition on a melody (optional)", interactive=True)
243
  with gr.Row():
244
  submit = gr.Button("Generate")
 
274
  )
275
  gr.Markdown("""
276
  ### More details
 
277
  The model will generate 12 seconds of audio based on the description you provided.
278
  You can optionaly provide a reference audio from which a broad melody will be extracted.
279
  The model will then try to follow both the description and melody provided.
280
  All samples are generated with the `melody` model.
 
281
  You can also use your own GPU or a Google Colab by following the instructions on our repo.
 
282
  See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
283
  for more details.
284
  """)
 
331
  if IS_BATCHED:
332
  ui_batched(launch_kwargs)
333
  else:
334
+ ui_full(launch_kwargs)