Spaces:

Tonic
/

audiocraft

Running on Zero

App Files Files Community

Tonic commited on Jun 16

Commit

7e452be

verified ·

1 Parent(s): a8787a5

Update main.py

Browse files

Files changed (1) hide show

main.py +71 -73

main.py CHANGED Viewed

@@ -545,79 +545,77 @@ def predict_full(model, text, chords_sym, melody_file,
                 ode_rtol, ode_atol,
                 ode_solver, ode_steps,
                 progress=gr.Progress()):
-   """Generate music using JASCO (Joint Audio-Symbolic Conditioning) model.
-    This function generates two variations of music based on text descriptions, chord progressions,
-    and optional melody and drum inputs. It uses the JASCO model to create high-quality music samples
-    with both global (text) and local (chords, drums, melody) controls.
-    Args:
-        model (str): The JASCO model to use. Options:
-            - 'facebook/jasco-chords-drums-400M': Basic model with chord and drum support (400M parameters)
-            - 'facebook/jasco-chords-drums-1B': Enhanced model with chord and drum support (1B parameters)
-            - 'facebook/jasco-chords-drums-melody-400M': Model with melody support (400M parameters)
-            - 'facebook/jasco-chords-drums-melody-1B': Full-featured model with melody support (1B parameters)
-        text (str): Text description of the desired music. Examples:
-            - "80s pop with groovy synth bass and electric piano"
-            - "Strings, woodwind, orchestral, symphony"
-            - "Jazz quartet with walking bass and smooth piano"
-        chords_sym (str): Chord progression in format "(Chord, Time), (Chord, Time), ...". Time is in seconds (0-10).
-            Example: "(C, 0.0), (D, 2.0), (F, 4.0), (Ab, 6.0), (Bb, 7.0), (C, 8.0)"
-        melody_file (File): Optional. PyTorch tensor file containing melody salience matrix.
-            Shape should be [n_melody_bins, T].
-        drums_file (Audio): Optional. WAV file containing drum patterns (2-4 bars recommended).
-        drums_mic (Audio): Optional. Microphone recording of drum patterns.
-        drum_input_src (str): Source of drum input. Either "file" or "mic".
-        cfg_coef_all (float): Classifier Free Guidance coefficient for overall conditioning.
-            Controls adherence to all input conditions. Range: 1.0-3.0. Default: 1.25.
-        cfg_coef_txt (float): Classifier Free Guidance coefficient for text conditioning.
-            Controls strength of text description matching. Range: 1.0-4.0. Default: 2.5.
-        ode_rtol (float): Relative tolerance for ODE solver. Default: 1e-4.
-        ode_atol (float): Absolute tolerance for ODE solver. Default: 1e-4.
-        ode_solver (str): ODE solver to use. Options:
-            - 'euler': Faster, less accurate
-            - 'dopri5': Slower, more accurate
-        ode_steps (int): Number of steps for euler solver. Default: 10.
-        progress (gr.Progress): Gradio progress bar for tracking generation progress.
-    Returns:
-        tuple: Two WAV file paths containing the generated music variations.
-    Raises:
-        gr.Error: If there are issues with:
-            - Model loading
-            - Invalid melody matrix shape
-            - Generation process
-            - User interruption
-    Notes:
-        - First generation may be slower due to model loading
-        - Subsequent generations with same model are faster
-        - Higher parameter models (1B) require more memory
-        - Melody-enabled models may be slower
-        - The function generates two variations of the music
-        - Each generation is 10 seconds long
-        - Output is provided as WAV files
-    Example:
-        ```python
-        wavs = predict_full(
-            model='facebook/jasco-chords-drums-melody-400M',
-            text="80s pop with groovy synth bass and electric piano",
-            chords_sym="(C, 0.0), (Am, 2.5), (F, 5.0), (G, 7.5)",
-            melody_file=None,
-            drums_file=None,
-            drums_mic=None,
-            drum_input_src="file",
-            cfg_coef_all=1.25,
-            cfg_coef_txt=2.5,
-            ode_rtol=1e-4,
-            ode_atol=1e-4,
-            ode_solver='euler',
-            ode_steps=10
-        )
-        ```
-    """
     global INTERRUPTING
     INTERRUPTING = False
     progress(0, desc="Loading model...")

                 ode_rtol, ode_atol,
                 ode_solver, ode_steps,
                 progress=gr.Progress()):
+               """Generate music using JASCO (Joint Audio-Symbolic Conditioning) model.
+                This function generates two variations of music based on text descriptions, chord progressions,
+                and optional melody and drum inputs. It uses the JASCO model to create high-quality music samples
+                with both global (text) and local (chords, drums, melody) controls.
+                Args:
+                    model (str): The JASCO model to use. Options:
+                        - 'facebook/jasco-chords-drums-400M': Basic model with chord and drum support (400M parameters)
+                        - 'facebook/jasco-chords-drums-1B': Enhanced model with chord and drum support (1B parameters)
+                        - 'facebook/jasco-chords-drums-melody-400M': Model with melody support (400M parameters)
+                        - 'facebook/jasco-chords-drums-melody-1B': Full-featured model with melody support (1B parameters)
+                    text (str): Text description of the desired music. Examples:
+                        - "80s pop with groovy synth bass and electric piano"
+                        - "Strings, woodwind, orchestral, symphony"
+                        - "Jazz quartet with walking bass and smooth piano"
+                    chords_sym (str): Chord progression in format "(Chord, Time), (Chord, Time), ...". Time is in seconds (0-10).
+                        Example: "(C, 0.0), (D, 2.0), (F, 4.0), (Ab, 6.0), (Bb, 7.0), (C, 8.0)"
+                    melody_file (File): Optional. PyTorch tensor file containing melody salience matrix.
+                        Shape should be [n_melody_bins, T].
+                    drums_file (Audio): Optional. WAV file containing drum patterns (2-4 bars recommended).
+                    drums_mic (Audio): Optional. Microphone recording of drum patterns.
+                    drum_input_src (str): Source of drum input. Either "file" or "mic".
+                    cfg_coef_all (float): Classifier Free Guidance coefficient for overall conditioning.
+                        Controls adherence to all input conditions. Range: 1.0-3.0. Default: 1.25.
+                    cfg_coef_txt (float): Classifier Free Guidance coefficient for text conditioning.
+                        Controls strength of text description matching. Range: 1.0-4.0. Default: 2.5.
+                    ode_rtol (float): Relative tolerance for ODE solver. Default: 1e-4.
+                    ode_atol (float): Absolute tolerance for ODE solver. Default: 1e-4.
+                    ode_solver (str): ODE solver to use. Options:
+                        - 'euler': Faster, less accurate
+                        - 'dopri5': Slower, more accurate
+                    ode_steps (int): Number of steps for euler solver. Default: 10.
+                    progress (gr.Progress): Gradio progress bar for tracking generation progress.
+                Returns:
+                    tuple: Two WAV file paths containing the generated music variations.
+                Raises:
+                    gr.Error: If there are issues with:
+                        - Model loading
+                        - Invalid melody matrix shape
+                        - Generation process
+                        - User interruption
+                Notes:
+                    - First generation may be slower due to model loading
+                    - Subsequent generations with same model are faster
+                    - Higher parameter models (1B) require more memory
+                    - Melody-enabled models may be slower
+                    - The function generates two variations of the music
+                    - Each generation is 10 seconds long
+                    - Output is provided as WAV files
+                Example:
+                    wavs = predict_full(
+                        model='facebook/jasco-chords-drums-melody-400M',
+                        text="80s pop with groovy synth bass and electric piano",
+                        chords_sym="(C, 0.0), (Am, 2.5), (F, 5.0), (G, 7.5)",
+                        melody_file=None,
+                        drums_file=None,
+                        drums_mic=None,
+                        drum_input_src="file",
+                        cfg_coef_all=1.25,
+                        cfg_coef_txt=2.5,
+                        ode_rtol=1e-4,
+                        ode_atol=1e-4,
+                        ode_solver='euler',
+                        ode_steps=10
+                    )
+                    """
     global INTERRUPTING
     INTERRUPTING = False
     progress(0, desc="Loading model...")