Tonic commited on
Commit
7e452be
Β·
verified Β·
1 Parent(s): a8787a5

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +71 -73
main.py CHANGED
@@ -545,79 +545,77 @@ def predict_full(model, text, chords_sym, melody_file,
545
  ode_rtol, ode_atol,
546
  ode_solver, ode_steps,
547
  progress=gr.Progress()):
548
- """Generate music using JASCO (Joint Audio-Symbolic Conditioning) model.
549
-
550
- This function generates two variations of music based on text descriptions, chord progressions,
551
- and optional melody and drum inputs. It uses the JASCO model to create high-quality music samples
552
- with both global (text) and local (chords, drums, melody) controls.
553
-
554
- Args:
555
- model (str): The JASCO model to use. Options:
556
- - 'facebook/jasco-chords-drums-400M': Basic model with chord and drum support (400M parameters)
557
- - 'facebook/jasco-chords-drums-1B': Enhanced model with chord and drum support (1B parameters)
558
- - 'facebook/jasco-chords-drums-melody-400M': Model with melody support (400M parameters)
559
- - 'facebook/jasco-chords-drums-melody-1B': Full-featured model with melody support (1B parameters)
560
- text (str): Text description of the desired music. Examples:
561
- - "80s pop with groovy synth bass and electric piano"
562
- - "Strings, woodwind, orchestral, symphony"
563
- - "Jazz quartet with walking bass and smooth piano"
564
- chords_sym (str): Chord progression in format "(Chord, Time), (Chord, Time), ...". Time is in seconds (0-10).
565
- Example: "(C, 0.0), (D, 2.0), (F, 4.0), (Ab, 6.0), (Bb, 7.0), (C, 8.0)"
566
- melody_file (File): Optional. PyTorch tensor file containing melody salience matrix.
567
- Shape should be [n_melody_bins, T].
568
- drums_file (Audio): Optional. WAV file containing drum patterns (2-4 bars recommended).
569
- drums_mic (Audio): Optional. Microphone recording of drum patterns.
570
- drum_input_src (str): Source of drum input. Either "file" or "mic".
571
- cfg_coef_all (float): Classifier Free Guidance coefficient for overall conditioning.
572
- Controls adherence to all input conditions. Range: 1.0-3.0. Default: 1.25.
573
- cfg_coef_txt (float): Classifier Free Guidance coefficient for text conditioning.
574
- Controls strength of text description matching. Range: 1.0-4.0. Default: 2.5.
575
- ode_rtol (float): Relative tolerance for ODE solver. Default: 1e-4.
576
- ode_atol (float): Absolute tolerance for ODE solver. Default: 1e-4.
577
- ode_solver (str): ODE solver to use. Options:
578
- - 'euler': Faster, less accurate
579
- - 'dopri5': Slower, more accurate
580
- ode_steps (int): Number of steps for euler solver. Default: 10.
581
- progress (gr.Progress): Gradio progress bar for tracking generation progress.
582
-
583
- Returns:
584
- tuple: Two WAV file paths containing the generated music variations.
585
-
586
- Raises:
587
- gr.Error: If there are issues with:
588
- - Model loading
589
- - Invalid melody matrix shape
590
- - Generation process
591
- - User interruption
592
-
593
- Notes:
594
- - First generation may be slower due to model loading
595
- - Subsequent generations with same model are faster
596
- - Higher parameter models (1B) require more memory
597
- - Melody-enabled models may be slower
598
- - The function generates two variations of the music
599
- - Each generation is 10 seconds long
600
- - Output is provided as WAV files
601
-
602
- Example:
603
- ```python
604
- wavs = predict_full(
605
- model='facebook/jasco-chords-drums-melody-400M',
606
- text="80s pop with groovy synth bass and electric piano",
607
- chords_sym="(C, 0.0), (Am, 2.5), (F, 5.0), (G, 7.5)",
608
- melody_file=None,
609
- drums_file=None,
610
- drums_mic=None,
611
- drum_input_src="file",
612
- cfg_coef_all=1.25,
613
- cfg_coef_txt=2.5,
614
- ode_rtol=1e-4,
615
- ode_atol=1e-4,
616
- ode_solver='euler',
617
- ode_steps=10
618
- )
619
- ```
620
- """
621
  global INTERRUPTING
622
  INTERRUPTING = False
623
  progress(0, desc="Loading model...")
 
545
  ode_rtol, ode_atol,
546
  ode_solver, ode_steps,
547
  progress=gr.Progress()):
548
+ """Generate music using JASCO (Joint Audio-Symbolic Conditioning) model.
549
+
550
+ This function generates two variations of music based on text descriptions, chord progressions,
551
+ and optional melody and drum inputs. It uses the JASCO model to create high-quality music samples
552
+ with both global (text) and local (chords, drums, melody) controls.
553
+
554
+ Args:
555
+ model (str): The JASCO model to use. Options:
556
+ - 'facebook/jasco-chords-drums-400M': Basic model with chord and drum support (400M parameters)
557
+ - 'facebook/jasco-chords-drums-1B': Enhanced model with chord and drum support (1B parameters)
558
+ - 'facebook/jasco-chords-drums-melody-400M': Model with melody support (400M parameters)
559
+ - 'facebook/jasco-chords-drums-melody-1B': Full-featured model with melody support (1B parameters)
560
+ text (str): Text description of the desired music. Examples:
561
+ - "80s pop with groovy synth bass and electric piano"
562
+ - "Strings, woodwind, orchestral, symphony"
563
+ - "Jazz quartet with walking bass and smooth piano"
564
+ chords_sym (str): Chord progression in format "(Chord, Time), (Chord, Time), ...". Time is in seconds (0-10).
565
+ Example: "(C, 0.0), (D, 2.0), (F, 4.0), (Ab, 6.0), (Bb, 7.0), (C, 8.0)"
566
+ melody_file (File): Optional. PyTorch tensor file containing melody salience matrix.
567
+ Shape should be [n_melody_bins, T].
568
+ drums_file (Audio): Optional. WAV file containing drum patterns (2-4 bars recommended).
569
+ drums_mic (Audio): Optional. Microphone recording of drum patterns.
570
+ drum_input_src (str): Source of drum input. Either "file" or "mic".
571
+ cfg_coef_all (float): Classifier Free Guidance coefficient for overall conditioning.
572
+ Controls adherence to all input conditions. Range: 1.0-3.0. Default: 1.25.
573
+ cfg_coef_txt (float): Classifier Free Guidance coefficient for text conditioning.
574
+ Controls strength of text description matching. Range: 1.0-4.0. Default: 2.5.
575
+ ode_rtol (float): Relative tolerance for ODE solver. Default: 1e-4.
576
+ ode_atol (float): Absolute tolerance for ODE solver. Default: 1e-4.
577
+ ode_solver (str): ODE solver to use. Options:
578
+ - 'euler': Faster, less accurate
579
+ - 'dopri5': Slower, more accurate
580
+ ode_steps (int): Number of steps for euler solver. Default: 10.
581
+ progress (gr.Progress): Gradio progress bar for tracking generation progress.
582
+
583
+ Returns:
584
+ tuple: Two WAV file paths containing the generated music variations.
585
+
586
+ Raises:
587
+ gr.Error: If there are issues with:
588
+ - Model loading
589
+ - Invalid melody matrix shape
590
+ - Generation process
591
+ - User interruption
592
+
593
+ Notes:
594
+ - First generation may be slower due to model loading
595
+ - Subsequent generations with same model are faster
596
+ - Higher parameter models (1B) require more memory
597
+ - Melody-enabled models may be slower
598
+ - The function generates two variations of the music
599
+ - Each generation is 10 seconds long
600
+ - Output is provided as WAV files
601
+
602
+ Example:
603
+ wavs = predict_full(
604
+ model='facebook/jasco-chords-drums-melody-400M',
605
+ text="80s pop with groovy synth bass and electric piano",
606
+ chords_sym="(C, 0.0), (Am, 2.5), (F, 5.0), (G, 7.5)",
607
+ melody_file=None,
608
+ drums_file=None,
609
+ drums_mic=None,
610
+ drum_input_src="file",
611
+ cfg_coef_all=1.25,
612
+ cfg_coef_txt=2.5,
613
+ ode_rtol=1e-4,
614
+ ode_atol=1e-4,
615
+ ode_solver='euler',
616
+ ode_steps=10
617
+ )
618
+ """
 
 
619
  global INTERRUPTING
620
  INTERRUPTING = False
621
  progress(0, desc="Loading model...")