Spaces:
Running
on
Zero
Running
on
Zero
Update main.py
Browse files
main.py
CHANGED
@@ -545,79 +545,77 @@ def predict_full(model, text, chords_sym, melody_file,
|
|
545 |
ode_rtol, ode_atol,
|
546 |
ode_solver, ode_steps,
|
547 |
progress=gr.Progress()):
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
|
619 |
-
```
|
620 |
-
"""
|
621 |
global INTERRUPTING
|
622 |
INTERRUPTING = False
|
623 |
progress(0, desc="Loading model...")
|
|
|
545 |
ode_rtol, ode_atol,
|
546 |
ode_solver, ode_steps,
|
547 |
progress=gr.Progress()):
|
548 |
+
"""Generate music using JASCO (Joint Audio-Symbolic Conditioning) model.
|
549 |
+
|
550 |
+
This function generates two variations of music based on text descriptions, chord progressions,
|
551 |
+
and optional melody and drum inputs. It uses the JASCO model to create high-quality music samples
|
552 |
+
with both global (text) and local (chords, drums, melody) controls.
|
553 |
+
|
554 |
+
Args:
|
555 |
+
model (str): The JASCO model to use. Options:
|
556 |
+
- 'facebook/jasco-chords-drums-400M': Basic model with chord and drum support (400M parameters)
|
557 |
+
- 'facebook/jasco-chords-drums-1B': Enhanced model with chord and drum support (1B parameters)
|
558 |
+
- 'facebook/jasco-chords-drums-melody-400M': Model with melody support (400M parameters)
|
559 |
+
- 'facebook/jasco-chords-drums-melody-1B': Full-featured model with melody support (1B parameters)
|
560 |
+
text (str): Text description of the desired music. Examples:
|
561 |
+
- "80s pop with groovy synth bass and electric piano"
|
562 |
+
- "Strings, woodwind, orchestral, symphony"
|
563 |
+
- "Jazz quartet with walking bass and smooth piano"
|
564 |
+
chords_sym (str): Chord progression in format "(Chord, Time), (Chord, Time), ...". Time is in seconds (0-10).
|
565 |
+
Example: "(C, 0.0), (D, 2.0), (F, 4.0), (Ab, 6.0), (Bb, 7.0), (C, 8.0)"
|
566 |
+
melody_file (File): Optional. PyTorch tensor file containing melody salience matrix.
|
567 |
+
Shape should be [n_melody_bins, T].
|
568 |
+
drums_file (Audio): Optional. WAV file containing drum patterns (2-4 bars recommended).
|
569 |
+
drums_mic (Audio): Optional. Microphone recording of drum patterns.
|
570 |
+
drum_input_src (str): Source of drum input. Either "file" or "mic".
|
571 |
+
cfg_coef_all (float): Classifier Free Guidance coefficient for overall conditioning.
|
572 |
+
Controls adherence to all input conditions. Range: 1.0-3.0. Default: 1.25.
|
573 |
+
cfg_coef_txt (float): Classifier Free Guidance coefficient for text conditioning.
|
574 |
+
Controls strength of text description matching. Range: 1.0-4.0. Default: 2.5.
|
575 |
+
ode_rtol (float): Relative tolerance for ODE solver. Default: 1e-4.
|
576 |
+
ode_atol (float): Absolute tolerance for ODE solver. Default: 1e-4.
|
577 |
+
ode_solver (str): ODE solver to use. Options:
|
578 |
+
- 'euler': Faster, less accurate
|
579 |
+
- 'dopri5': Slower, more accurate
|
580 |
+
ode_steps (int): Number of steps for euler solver. Default: 10.
|
581 |
+
progress (gr.Progress): Gradio progress bar for tracking generation progress.
|
582 |
+
|
583 |
+
Returns:
|
584 |
+
tuple: Two WAV file paths containing the generated music variations.
|
585 |
+
|
586 |
+
Raises:
|
587 |
+
gr.Error: If there are issues with:
|
588 |
+
- Model loading
|
589 |
+
- Invalid melody matrix shape
|
590 |
+
- Generation process
|
591 |
+
- User interruption
|
592 |
+
|
593 |
+
Notes:
|
594 |
+
- First generation may be slower due to model loading
|
595 |
+
- Subsequent generations with same model are faster
|
596 |
+
- Higher parameter models (1B) require more memory
|
597 |
+
- Melody-enabled models may be slower
|
598 |
+
- The function generates two variations of the music
|
599 |
+
- Each generation is 10 seconds long
|
600 |
+
- Output is provided as WAV files
|
601 |
+
|
602 |
+
Example:
|
603 |
+
wavs = predict_full(
|
604 |
+
model='facebook/jasco-chords-drums-melody-400M',
|
605 |
+
text="80s pop with groovy synth bass and electric piano",
|
606 |
+
chords_sym="(C, 0.0), (Am, 2.5), (F, 5.0), (G, 7.5)",
|
607 |
+
melody_file=None,
|
608 |
+
drums_file=None,
|
609 |
+
drums_mic=None,
|
610 |
+
drum_input_src="file",
|
611 |
+
cfg_coef_all=1.25,
|
612 |
+
cfg_coef_txt=2.5,
|
613 |
+
ode_rtol=1e-4,
|
614 |
+
ode_atol=1e-4,
|
615 |
+
ode_solver='euler',
|
616 |
+
ode_steps=10
|
617 |
+
)
|
618 |
+
"""
|
|
|
|
|
619 |
global INTERRUPTING
|
620 |
INTERRUPTING = False
|
621 |
progress(0, desc="Loading model...")
|