Surn commited on
Commit
75e9a90
·
1 Parent(s): 0b202ef

User History Update

Browse files
Files changed (2) hide show
  1. app.py +54 -60
  2. audiocraft/models/lm.py +1 -1
app.py CHANGED
@@ -32,7 +32,7 @@ import librosa
32
  import modules.user_history
33
  from modules.version_info import versions_html, commit_hash, get_xformers_version
34
  from modules.gradio import *
35
- from modules.file_utils import get_file_parts, get_filename_from_filepath, convert_title_to_filename, get_filename, delete_file
36
 
37
  MODEL = None
38
  MODELS = None
@@ -243,7 +243,7 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
243
  try:
244
  if melody and ("melody" in model):
245
  # return excess duration, load next model and continue in loop structure building up output_segments
246
- if duration > MODEL.lm.cfg.dataset.segment_duration:
247
  output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only, progress=gr.Progress(track_tqdm=True))
248
  else:
249
  # pure original code
@@ -323,66 +323,66 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
323
  output = output.detach().cpu().float()[0]
324
 
325
  title_file_name = convert_title_to_filename(title)
326
- with NamedTemporaryFile("wb", suffix=".wav", delete=False, prefix = title_file_name) as file:
327
  video_description = f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n Model: {model}\n Melody Condition:{melody_name}\n Sample Segment: {prompt_index}"
328
  if include_settings or include_title:
329
- background = add_settings_to_image(title if include_title else "", video_description if include_settings else "", background_path=background, font=settings_font, font_color=settings_font_color)
330
  audio_write(
331
  file.name, output, MODEL.sample_rate, strategy="loudness",
332
  loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
333
- waveform_video_path = get_waveform(file.name,bg_image=background, bar_count=45, name = title_file_name)
334
  # Remove the extension from file.name
335
  file_name_without_extension = os.path.splitext(file.name)[0]
336
  # Get the directory, filename, name, extension, and new extension of the waveform video path
337
  video_dir, video_name, video_name, video_ext, video_new_ext = get_file_parts(waveform_video_path)
338
 
339
- new_video_path = os.path.join(video_dir, title_file_name + video_new_ext)
340
-
341
  mp4 = MP4(waveform_video_path)
342
  mp4["©nam"] = title_file_name # Title tag
343
  mp4["desc"] = f"{text}\n Duration: {str(initial_duration)}" # Description tag
344
 
345
  commit = commit_hash()
346
- metadata={
347
- "prompt": text,
348
- "negative_prompt": "",
349
- "Seed": seed,
350
- "steps": 1,
351
- "width": "768px",
352
- "height":"512px",
353
- "Dimension": dimension,
354
- "Top-k": topk,
355
- "Top-p":topp,
356
- "Randomness": temperature,
357
- "cfg":cfg_coef,
358
- "overlap": overlap,
359
- "Melody Condition": melody_name,
360
- "Sample Segment": prompt_index,
361
- "Duration": initial_duration,
362
- "Audio": file.name,
363
- "font": settings_font,
364
- "font_color": settings_font_color,
365
- "harmony_only": harmony_only,
366
- "background": background,
367
- "include_title": include_title,
368
- "include_settings": include_settings,
369
- "profile": "Satoshi Nakamoto" if profile.value is None else profile.value.username,
370
- "commit": commit_hash(),
371
- "tag": git_tag(),
372
- "version": gr.__version__,
373
- "model_version": MODEL.version,
374
- "model_name": MODEL.name,
375
- "model_description": f"{MODEL.audio_channels} channels, {MODEL.sample_rate} Hz",
376
- "melody_name" : melody_name if melody_name else "",
377
- "melody_extension" : melody_extension if melody_extension else "",
378
- "hostname": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen",
379
- "version" : f"""https://huggingface.co/spaces/Surn/UnlimitedMusicGen/commit/{"huggingface" if commit == "<none>" else commit}""",
380
- "python" : sys.version,
381
- "torch" : getattr(torch, '__long_version__',torch.__version__),
382
- "xformers": get_xformers_version(),
383
- "gradio": gr.__version__,
384
- "huggingface_space": os.environ.get('SPACE_ID', ''),
385
- "CUDA": f"""{"CUDA is available. device: " + torch.cuda.get_device_name(0) + " version: " + torch.version.cuda if torch.cuda.is_available() else "CUDA is not available."}""",
386
  }
387
  # Add additional metadata from the metadata dictionary (if it exists)
388
  for key, value in metadata.items():
@@ -392,16 +392,10 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
392
  mp4.save()
393
 
394
  try:
395
- if os.path.exists(new_video_path):
396
- delete_file(new_video_path)
397
- # Open the original MP4 file in binary read mode and the new file in binary write mode
398
- with open(waveform_video_path, "rb") as src, open(new_video_path, "wb") as dst:
399
- if os.path.exists(waveform_video_path):
400
- # Copy the contents from the source file to the destination file
401
- shutil.copyfileobj(src, dst)
402
- waveform_video_path = new_video_path
403
  except Exception as e:
404
- print(f"Error copying file: {e}")
405
 
406
  if waveform_video_path:
407
  modules.user_history.save_file(
@@ -454,7 +448,7 @@ def ui(**kwargs):
454
  text = gr.Text(label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out", key="prompt", lines=4)
455
  autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb")
456
  with gr.Column():
457
- duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration (s)", interactive=True, key="total_duration")
458
  model = gr.Radio(["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large"], label="AI Model", value="medium", interactive=True, key="chosen_model")
459
  with gr.Row():
460
  submit = gr.Button("Generate", elem_id="btn-generate")
@@ -479,14 +473,14 @@ def ui(**kwargs):
479
  settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True, key="settings_font_color")
480
  with gr.Accordion("Expert", open=False):
481
  with gr.Row():
482
- segment_length = gr.Slider(minimum=10, maximum=30, value=30, step =1,label="Music Generation Segment Length (s)", interactive=True,key="segment_length")
483
  overlap = gr.Slider(minimum=0, maximum=15, value=1, step=1, label="Segment Overlap", interactive=True)
484
  dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
485
  with gr.Row():
486
  topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True, info="more structured", key="topk")
487
  topp = gr.Number(label="Top-p", value=1150, precision=0, interactive=True, info="more variation, overwrites Top-k if not zero", key="topp")
488
- temperature = gr.Number(label="Randomness Temperature", value=0.7, precision=None, interactive=True, info="less than one to follow Melody Condition song closely", key="temperature")
489
- cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.75, precision=None, interactive=True, info="3.0-4.0, stereo and small need more", key="cfg_coef")
490
  with gr.Row():
491
  seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True, key="seed")
492
  gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(fn=lambda: -1, outputs=[seed], queue=False)
 
32
  import modules.user_history
33
  from modules.version_info import versions_html, commit_hash, get_xformers_version
34
  from modules.gradio import *
35
+ from modules.file_utils import get_file_parts, get_filename_from_filepath, convert_title_to_filename, get_unique_file_path, delete_file
36
 
37
  MODEL = None
38
  MODELS = None
 
243
  try:
244
  if melody and ("melody" in model):
245
  # return excess duration, load next model and continue in loop structure building up output_segments
246
+ if duration > segment_duration:
247
  output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only, progress=gr.Progress(track_tqdm=True))
248
  else:
249
  # pure original code
 
323
  output = output.detach().cpu().float()[0]
324
 
325
  title_file_name = convert_title_to_filename(title)
326
+ with NamedTemporaryFile("wb", suffix=".wav", delete=False, prefix=title_file_name) as file:
327
  video_description = f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n Model: {model}\n Melody Condition:{melody_name}\n Sample Segment: {prompt_index}"
328
  if include_settings or include_title:
329
+ background = add_settings_to_image(title if include_title else "",video_description if include_settings else "",background_path=background,font=settings_font,font_color=settings_font_color)
330
  audio_write(
331
  file.name, output, MODEL.sample_rate, strategy="loudness",
332
  loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
333
+ waveform_video_path = get_waveform(file.name, bg_image=background, bar_count=45, name=title_file_name)
334
  # Remove the extension from file.name
335
  file_name_without_extension = os.path.splitext(file.name)[0]
336
  # Get the directory, filename, name, extension, and new extension of the waveform video path
337
  video_dir, video_name, video_name, video_ext, video_new_ext = get_file_parts(waveform_video_path)
338
 
339
+ new_video_path = get_unique_file_path(video_dir, title_file_name, video_new_ext)
340
+
341
  mp4 = MP4(waveform_video_path)
342
  mp4["©nam"] = title_file_name # Title tag
343
  mp4["desc"] = f"{text}\n Duration: {str(initial_duration)}" # Description tag
344
 
345
  commit = commit_hash()
346
+ metadata = {
347
+ "prompt": text,
348
+ "negative_prompt": "",
349
+ "Seed": seed,
350
+ "steps": 1,
351
+ "width": "768px",
352
+ "height": "512px",
353
+ "Dimension": dimension,
354
+ "Top-k": topk,
355
+ "Top-p": topp,
356
+ "Randomness": temperature,
357
+ "cfg": cfg_coef,
358
+ "overlap": overlap,
359
+ "Melody Condition": melody_name,
360
+ "Sample Segment": prompt_index,
361
+ "Duration": initial_duration,
362
+ "Audio": file.name,
363
+ "font": settings_font,
364
+ "font_color": settings_font_color,
365
+ "harmony_only": harmony_only,
366
+ "background": background,
367
+ "include_title": include_title,
368
+ "include_settings": include_settings,
369
+ "profile": "Satoshi Nakamoto" if profile.value is None else profile.value.username,
370
+ "commit": commit_hash(),
371
+ "tag": git_tag(),
372
+ "version": gr.__version__,
373
+ "model_version": MODEL.version,
374
+ "model_name": MODEL.name,
375
+ "model_description": f"{MODEL.audio_channels} channels, {MODEL.sample_rate} Hz",
376
+ "melody_name": melody_name if melody_name else "",
377
+ "melody_extension": melody_extension if melody_extension else "",
378
+ "hostname": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen",
379
+ "version": f"https://huggingface.co/spaces/Surn/UnlimitedMusicGen/commit/{'huggingface' if commit == '<none>' else commit}",
380
+ "python": sys.version,
381
+ "torch": getattr(torch, '__long_version__', torch.__version__),
382
+ "xformers": get_xformers_version(),
383
+ "gradio": gr.__version__,
384
+ "huggingface_space": os.environ.get('SPACE_ID', ''),
385
+ "CUDA": f"{'CUDA is available. device: ' + torch.cuda.get_device_name(0) + ' version: ' + torch.version.cuda if torch.cuda.is_available() else 'CUDA is not available.'}",
386
  }
387
  # Add additional metadata from the metadata dictionary (if it exists)
388
  for key, value in metadata.items():
 
392
  mp4.save()
393
 
394
  try:
395
+ os.replace(waveform_video_path, new_video_path)
396
+ waveform_video_path = new_video_path
 
 
 
 
 
 
397
  except Exception as e:
398
+ print(f"Error renaming file: {e}")
399
 
400
  if waveform_video_path:
401
  modules.user_history.save_file(
 
448
  text = gr.Text(label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out", key="prompt", lines=4)
449
  autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb")
450
  with gr.Column():
451
+ duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration (s)", interactive=True, key="total_duration", step=1)
452
  model = gr.Radio(["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large"], label="AI Model", value="medium", interactive=True, key="chosen_model")
453
  with gr.Row():
454
  submit = gr.Button("Generate", elem_id="btn-generate")
 
473
  settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True, key="settings_font_color")
474
  with gr.Accordion("Expert", open=False):
475
  with gr.Row():
476
+ segment_length = gr.Slider(minimum=10, maximum=30, value=30, step=1,label="Music Generation Segment Length (s)", interactive=True,key="segment_length")
477
  overlap = gr.Slider(minimum=0, maximum=15, value=1, step=1, label="Segment Overlap", interactive=True)
478
  dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
479
  with gr.Row():
480
  topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True, info="more structured", key="topk")
481
  topp = gr.Number(label="Top-p", value=1150, precision=0, interactive=True, info="more variation, overwrites Top-k if not zero", key="topp")
482
+ temperature = gr.Number(label="Randomness Temperature", value=0.7, precision=None, step=0.1, interactive=True, info="less than one to follow Melody Condition song closely", key="temperature")
483
+ cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.75, precision=None, step=0.1, interactive=True, info="3.0-4.0, stereo and small need more", key="cfg_coef")
484
  with gr.Row():
485
  seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True, key="seed")
486
  gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(fn=lambda: -1, outputs=[seed], queue=False)
audiocraft/models/lm.py CHANGED
@@ -463,7 +463,7 @@ class LMModel(StreamingModule):
463
  B, K, T = prompt.shape
464
  start_offset = T
465
  print(f"start_offset: {start_offset} | max_gen_len: {max_gen_len}")
466
- assert start_offset < max_gen_len
467
 
468
  pattern = self.pattern_provider.get_pattern(max_gen_len)
469
  # this token is used as default value for codes that are not generated yet
 
463
  B, K, T = prompt.shape
464
  start_offset = T
465
  print(f"start_offset: {start_offset} | max_gen_len: {max_gen_len}")
466
+ assert start_offset <= max_gen_len
467
 
468
  pattern = self.pattern_provider.get_pattern(max_gen_len)
469
  # this token is used as default value for codes that are not generated yet