Spaces:
Running
on
T4
Running
on
T4
User History Update
Browse files- app.py +54 -60
- audiocraft/models/lm.py +1 -1
app.py
CHANGED
@@ -32,7 +32,7 @@ import librosa
|
|
32 |
import modules.user_history
|
33 |
from modules.version_info import versions_html, commit_hash, get_xformers_version
|
34 |
from modules.gradio import *
|
35 |
-
from modules.file_utils import get_file_parts, get_filename_from_filepath, convert_title_to_filename,
|
36 |
|
37 |
MODEL = None
|
38 |
MODELS = None
|
@@ -243,7 +243,7 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
|
|
243 |
try:
|
244 |
if melody and ("melody" in model):
|
245 |
# return excess duration, load next model and continue in loop structure building up output_segments
|
246 |
-
if duration >
|
247 |
output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only, progress=gr.Progress(track_tqdm=True))
|
248 |
else:
|
249 |
# pure original code
|
@@ -323,66 +323,66 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
|
|
323 |
output = output.detach().cpu().float()[0]
|
324 |
|
325 |
title_file_name = convert_title_to_filename(title)
|
326 |
-
with NamedTemporaryFile("wb", suffix=".wav", delete=False, prefix
|
327 |
video_description = f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n Model: {model}\n Melody Condition:{melody_name}\n Sample Segment: {prompt_index}"
|
328 |
if include_settings or include_title:
|
329 |
-
background = add_settings_to_image(title if include_title else "",
|
330 |
audio_write(
|
331 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
332 |
loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
|
333 |
-
waveform_video_path = get_waveform(file.name,bg_image=background, bar_count=45, name
|
334 |
# Remove the extension from file.name
|
335 |
file_name_without_extension = os.path.splitext(file.name)[0]
|
336 |
# Get the directory, filename, name, extension, and new extension of the waveform video path
|
337 |
video_dir, video_name, video_name, video_ext, video_new_ext = get_file_parts(waveform_video_path)
|
338 |
|
339 |
-
new_video_path =
|
340 |
-
|
341 |
mp4 = MP4(waveform_video_path)
|
342 |
mp4["©nam"] = title_file_name # Title tag
|
343 |
mp4["desc"] = f"{text}\n Duration: {str(initial_duration)}" # Description tag
|
344 |
|
345 |
commit = commit_hash()
|
346 |
-
metadata={
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
}
|
387 |
# Add additional metadata from the metadata dictionary (if it exists)
|
388 |
for key, value in metadata.items():
|
@@ -392,16 +392,10 @@ def predict(model, text, melody_filepath, duration, dimension, topk, topp, tempe
|
|
392 |
mp4.save()
|
393 |
|
394 |
try:
|
395 |
-
|
396 |
-
|
397 |
-
# Open the original MP4 file in binary read mode and the new file in binary write mode
|
398 |
-
with open(waveform_video_path, "rb") as src, open(new_video_path, "wb") as dst:
|
399 |
-
if os.path.exists(waveform_video_path):
|
400 |
-
# Copy the contents from the source file to the destination file
|
401 |
-
shutil.copyfileobj(src, dst)
|
402 |
-
waveform_video_path = new_video_path
|
403 |
except Exception as e:
|
404 |
-
print(f"Error
|
405 |
|
406 |
if waveform_video_path:
|
407 |
modules.user_history.save_file(
|
@@ -454,7 +448,7 @@ def ui(**kwargs):
|
|
454 |
text = gr.Text(label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out", key="prompt", lines=4)
|
455 |
autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb")
|
456 |
with gr.Column():
|
457 |
-
duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration (s)", interactive=True, key="total_duration")
|
458 |
model = gr.Radio(["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large"], label="AI Model", value="medium", interactive=True, key="chosen_model")
|
459 |
with gr.Row():
|
460 |
submit = gr.Button("Generate", elem_id="btn-generate")
|
@@ -479,14 +473,14 @@ def ui(**kwargs):
|
|
479 |
settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True, key="settings_font_color")
|
480 |
with gr.Accordion("Expert", open=False):
|
481 |
with gr.Row():
|
482 |
-
segment_length = gr.Slider(minimum=10, maximum=30, value=30, step
|
483 |
overlap = gr.Slider(minimum=0, maximum=15, value=1, step=1, label="Segment Overlap", interactive=True)
|
484 |
dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
|
485 |
with gr.Row():
|
486 |
topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True, info="more structured", key="topk")
|
487 |
topp = gr.Number(label="Top-p", value=1150, precision=0, interactive=True, info="more variation, overwrites Top-k if not zero", key="topp")
|
488 |
-
temperature = gr.Number(label="Randomness Temperature", value=0.7, precision=None, interactive=True, info="less than one to follow Melody Condition song closely", key="temperature")
|
489 |
-
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.75, precision=None, interactive=True, info="3.0-4.0, stereo and small need more", key="cfg_coef")
|
490 |
with gr.Row():
|
491 |
seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True, key="seed")
|
492 |
gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(fn=lambda: -1, outputs=[seed], queue=False)
|
|
|
32 |
import modules.user_history
|
33 |
from modules.version_info import versions_html, commit_hash, get_xformers_version
|
34 |
from modules.gradio import *
|
35 |
+
from modules.file_utils import get_file_parts, get_filename_from_filepath, convert_title_to_filename, get_unique_file_path, delete_file
|
36 |
|
37 |
MODEL = None
|
38 |
MODELS = None
|
|
|
243 |
try:
|
244 |
if melody and ("melody" in model):
|
245 |
# return excess duration, load next model and continue in loop structure building up output_segments
|
246 |
+
if duration > segment_duration:
|
247 |
output_segments, duration = generate_music_segments(text, melody, seed, MODEL, duration, overlap, MODEL.lm.cfg.dataset.segment_duration, prompt_index, harmony_only, progress=gr.Progress(track_tqdm=True))
|
248 |
else:
|
249 |
# pure original code
|
|
|
323 |
output = output.detach().cpu().float()[0]
|
324 |
|
325 |
title_file_name = convert_title_to_filename(title)
|
326 |
+
with NamedTemporaryFile("wb", suffix=".wav", delete=False, prefix=title_file_name) as file:
|
327 |
video_description = f"{text}\n Duration: {str(initial_duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef} overlap: {overlap}\n Seed: {seed}\n Model: {model}\n Melody Condition:{melody_name}\n Sample Segment: {prompt_index}"
|
328 |
if include_settings or include_title:
|
329 |
+
background = add_settings_to_image(title if include_title else "",video_description if include_settings else "",background_path=background,font=settings_font,font_color=settings_font_color)
|
330 |
audio_write(
|
331 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
332 |
loudness_headroom_db=18, loudness_compressor=True, add_suffix=False, channels=2)
|
333 |
+
waveform_video_path = get_waveform(file.name, bg_image=background, bar_count=45, name=title_file_name)
|
334 |
# Remove the extension from file.name
|
335 |
file_name_without_extension = os.path.splitext(file.name)[0]
|
336 |
# Get the directory, filename, name, extension, and new extension of the waveform video path
|
337 |
video_dir, video_name, video_name, video_ext, video_new_ext = get_file_parts(waveform_video_path)
|
338 |
|
339 |
+
new_video_path = get_unique_file_path(video_dir, title_file_name, video_new_ext)
|
340 |
+
|
341 |
mp4 = MP4(waveform_video_path)
|
342 |
mp4["©nam"] = title_file_name # Title tag
|
343 |
mp4["desc"] = f"{text}\n Duration: {str(initial_duration)}" # Description tag
|
344 |
|
345 |
commit = commit_hash()
|
346 |
+
metadata = {
|
347 |
+
"prompt": text,
|
348 |
+
"negative_prompt": "",
|
349 |
+
"Seed": seed,
|
350 |
+
"steps": 1,
|
351 |
+
"width": "768px",
|
352 |
+
"height": "512px",
|
353 |
+
"Dimension": dimension,
|
354 |
+
"Top-k": topk,
|
355 |
+
"Top-p": topp,
|
356 |
+
"Randomness": temperature,
|
357 |
+
"cfg": cfg_coef,
|
358 |
+
"overlap": overlap,
|
359 |
+
"Melody Condition": melody_name,
|
360 |
+
"Sample Segment": prompt_index,
|
361 |
+
"Duration": initial_duration,
|
362 |
+
"Audio": file.name,
|
363 |
+
"font": settings_font,
|
364 |
+
"font_color": settings_font_color,
|
365 |
+
"harmony_only": harmony_only,
|
366 |
+
"background": background,
|
367 |
+
"include_title": include_title,
|
368 |
+
"include_settings": include_settings,
|
369 |
+
"profile": "Satoshi Nakamoto" if profile.value is None else profile.value.username,
|
370 |
+
"commit": commit_hash(),
|
371 |
+
"tag": git_tag(),
|
372 |
+
"version": gr.__version__,
|
373 |
+
"model_version": MODEL.version,
|
374 |
+
"model_name": MODEL.name,
|
375 |
+
"model_description": f"{MODEL.audio_channels} channels, {MODEL.sample_rate} Hz",
|
376 |
+
"melody_name": melody_name if melody_name else "",
|
377 |
+
"melody_extension": melody_extension if melody_extension else "",
|
378 |
+
"hostname": "https://huggingface.co/spaces/Surn/UnlimitedMusicGen",
|
379 |
+
"version": f"https://huggingface.co/spaces/Surn/UnlimitedMusicGen/commit/{'huggingface' if commit == '<none>' else commit}",
|
380 |
+
"python": sys.version,
|
381 |
+
"torch": getattr(torch, '__long_version__', torch.__version__),
|
382 |
+
"xformers": get_xformers_version(),
|
383 |
+
"gradio": gr.__version__,
|
384 |
+
"huggingface_space": os.environ.get('SPACE_ID', ''),
|
385 |
+
"CUDA": f"{'CUDA is available. device: ' + torch.cuda.get_device_name(0) + ' version: ' + torch.version.cuda if torch.cuda.is_available() else 'CUDA is not available.'}",
|
386 |
}
|
387 |
# Add additional metadata from the metadata dictionary (if it exists)
|
388 |
for key, value in metadata.items():
|
|
|
392 |
mp4.save()
|
393 |
|
394 |
try:
|
395 |
+
os.replace(waveform_video_path, new_video_path)
|
396 |
+
waveform_video_path = new_video_path
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
except Exception as e:
|
398 |
+
print(f"Error renaming file: {e}")
|
399 |
|
400 |
if waveform_video_path:
|
401 |
modules.user_history.save_file(
|
|
|
448 |
text = gr.Text(label="Describe your music", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi, soft fade-in, soft fade-out", key="prompt", lines=4)
|
449 |
autoplay_cb = gr.Checkbox(value=False, label="Autoplay?", key="autoplay_cb")
|
450 |
with gr.Column():
|
451 |
+
duration = gr.Slider(minimum=1, maximum=720, value=10, label="Duration (s)", interactive=True, key="total_duration", step=1)
|
452 |
model = gr.Radio(["melody", "medium", "small", "large", "melody-large", "stereo-small", "stereo-medium", "stereo-large", "stereo-melody", "stereo-melody-large"], label="AI Model", value="medium", interactive=True, key="chosen_model")
|
453 |
with gr.Row():
|
454 |
submit = gr.Button("Generate", elem_id="btn-generate")
|
|
|
473 |
settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#c87f05", interactive=True, key="settings_font_color")
|
474 |
with gr.Accordion("Expert", open=False):
|
475 |
with gr.Row():
|
476 |
+
segment_length = gr.Slider(minimum=10, maximum=30, value=30, step=1,label="Music Generation Segment Length (s)", interactive=True,key="segment_length")
|
477 |
overlap = gr.Slider(minimum=0, maximum=15, value=1, step=1, label="Segment Overlap", interactive=True)
|
478 |
dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
|
479 |
with gr.Row():
|
480 |
topk = gr.Number(label="Top-k", value=280, precision=0, interactive=True, info="more structured", key="topk")
|
481 |
topp = gr.Number(label="Top-p", value=1150, precision=0, interactive=True, info="more variation, overwrites Top-k if not zero", key="topp")
|
482 |
+
temperature = gr.Number(label="Randomness Temperature", value=0.7, precision=None, step=0.1, interactive=True, info="less than one to follow Melody Condition song closely", key="temperature")
|
483 |
+
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.75, precision=None, step=0.1, interactive=True, info="3.0-4.0, stereo and small need more", key="cfg_coef")
|
484 |
with gr.Row():
|
485 |
seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True, key="seed")
|
486 |
gr.Button('\U0001f3b2\ufe0f', elem_classes="small-btn").click(fn=lambda: -1, outputs=[seed], queue=False)
|
audiocraft/models/lm.py
CHANGED
@@ -463,7 +463,7 @@ class LMModel(StreamingModule):
|
|
463 |
B, K, T = prompt.shape
|
464 |
start_offset = T
|
465 |
print(f"start_offset: {start_offset} | max_gen_len: {max_gen_len}")
|
466 |
-
assert start_offset
|
467 |
|
468 |
pattern = self.pattern_provider.get_pattern(max_gen_len)
|
469 |
# this token is used as default value for codes that are not generated yet
|
|
|
463 |
B, K, T = prompt.shape
|
464 |
start_offset = T
|
465 |
print(f"start_offset: {start_offset} | max_gen_len: {max_gen_len}")
|
466 |
+
assert start_offset <= max_gen_len
|
467 |
|
468 |
pattern = self.pattern_provider.get_pattern(max_gen_len)
|
469 |
# this token is used as default value for codes that are not generated yet
|