Spaces:
Running
on
L40S
Running
on
L40S
add
Browse files- README.md +30 -3
- app.py +133 -0
- requirements.txt +11 -0
README.md
CHANGED
@@ -1,13 +1,40 @@
|
|
1 |
---
|
2 |
title: Elastic Musicgen Large
|
3 |
-
emoji:
|
4 |
colorFrom: pink
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.34.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
short_description:
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
title: Elastic Musicgen Large
|
3 |
+
emoji: π΅
|
4 |
colorFrom: pink
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.34.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
short_description: Generate music from text descriptions using Facebook's MusicGen Large model
|
11 |
---
|
12 |
|
13 |
+
# π΅ Elastic MusicGen Large
|
14 |
+
|
15 |
+
This Hugging Face Space provides a user-friendly interface for generating music from text descriptions using Facebook's MusicGen Large model with Elastic's optimizations.
|
16 |
+
|
17 |
+
## Features
|
18 |
+
|
19 |
+
- **Text-to-Music Generation**: Convert text descriptions into high-quality audio
|
20 |
+
- **Customizable Parameters**: Control creativity, duration, and generation parameters
|
21 |
+
- **Interactive Interface**: Easy-to-use Gradio interface with real-time controls
|
22 |
+
- **Example Prompts**: Pre-loaded examples to get you started
|
23 |
+
|
24 |
+
## How to Use
|
25 |
+
|
26 |
+
1. Enter a detailed description of the music you want to generate
|
27 |
+
2. Adjust the duration (5-30 seconds) and creativity settings
|
28 |
+
3. Click "Generate Music" and wait for your audio to be created
|
29 |
+
4. Listen to and download your generated music
|
30 |
+
|
31 |
+
## Tips for Better Results
|
32 |
+
|
33 |
+
- Be specific in your descriptions (e.g., "slow blues guitar with harmonica")
|
34 |
+
- Higher temperature values create more creative/unpredictable results
|
35 |
+
- Lower temperature values produce more consistent, predictable music
|
36 |
+
- Try different combinations of instruments and genres
|
37 |
+
|
38 |
+
---
|
39 |
+
|
40 |
+
*Powered by Facebook's MusicGen Large model with Elastic optimizations*
|
app.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
4 |
+
import scipy.io.wavfile
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
# Load model and processor
|
8 |
+
@gr.cache()
|
9 |
+
def load_model():
|
10 |
+
"""Load the musicgen model and processor"""
|
11 |
+
processor = AutoProcessor.from_pretrained("facebook/musicgen-large")
|
12 |
+
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-large")
|
13 |
+
return processor, model
|
14 |
+
|
15 |
+
def generate_music(text_prompt, duration=10, temperature=1.0, top_k=250, top_p=0.0):
|
16 |
+
"""Generate music based on text prompt"""
|
17 |
+
try:
|
18 |
+
processor, model = load_model()
|
19 |
+
|
20 |
+
# Process the text prompt
|
21 |
+
inputs = processor(
|
22 |
+
text=[text_prompt],
|
23 |
+
padding=True,
|
24 |
+
return_tensors="pt",
|
25 |
+
)
|
26 |
+
|
27 |
+
# Generate audio
|
28 |
+
with torch.no_grad():
|
29 |
+
audio_values = model.generate(
|
30 |
+
**inputs,
|
31 |
+
max_new_tokens=duration * 50, # Approximate tokens per second
|
32 |
+
do_sample=True,
|
33 |
+
temperature=temperature,
|
34 |
+
top_k=top_k,
|
35 |
+
top_p=top_p,
|
36 |
+
)
|
37 |
+
|
38 |
+
# Convert to numpy array and prepare for output
|
39 |
+
audio_data = audio_values[0, 0].cpu().numpy()
|
40 |
+
sample_rate = model.config.sample_rate
|
41 |
+
|
42 |
+
# Normalize audio
|
43 |
+
audio_data = audio_data / np.max(np.abs(audio_data))
|
44 |
+
|
45 |
+
return sample_rate, audio_data
|
46 |
+
|
47 |
+
except Exception as e:
|
48 |
+
return None, f"Error generating music: {str(e)}"
|
49 |
+
|
50 |
+
# Create Gradio interface
|
51 |
+
with gr.Blocks(title="MusicGen Large - Music Generation") as demo:
|
52 |
+
gr.Markdown("# π΅ MusicGen Large Music Generator")
|
53 |
+
gr.Markdown("Generate music from text descriptions using Facebook's MusicGen Large model.")
|
54 |
+
|
55 |
+
with gr.Row():
|
56 |
+
with gr.Column():
|
57 |
+
text_input = gr.Textbox(
|
58 |
+
label="Music Description",
|
59 |
+
placeholder="Enter a description of the music you want to generate (e.g., 'upbeat jazz with piano and drums')",
|
60 |
+
lines=3
|
61 |
+
)
|
62 |
+
|
63 |
+
with gr.Row():
|
64 |
+
duration = gr.Slider(
|
65 |
+
minimum=5,
|
66 |
+
maximum=30,
|
67 |
+
value=10,
|
68 |
+
step=1,
|
69 |
+
label="Duration (seconds)"
|
70 |
+
)
|
71 |
+
temperature = gr.Slider(
|
72 |
+
minimum=0.1,
|
73 |
+
maximum=2.0,
|
74 |
+
value=1.0,
|
75 |
+
step=0.1,
|
76 |
+
label="Temperature (creativity)"
|
77 |
+
)
|
78 |
+
|
79 |
+
with gr.Row():
|
80 |
+
top_k = gr.Slider(
|
81 |
+
minimum=1,
|
82 |
+
maximum=500,
|
83 |
+
value=250,
|
84 |
+
step=1,
|
85 |
+
label="Top-k"
|
86 |
+
)
|
87 |
+
top_p = gr.Slider(
|
88 |
+
minimum=0.0,
|
89 |
+
maximum=1.0,
|
90 |
+
value=0.0,
|
91 |
+
step=0.1,
|
92 |
+
label="Top-p"
|
93 |
+
)
|
94 |
+
|
95 |
+
generate_btn = gr.Button("π΅ Generate Music", variant="primary")
|
96 |
+
|
97 |
+
with gr.Column():
|
98 |
+
audio_output = gr.Audio(
|
99 |
+
label="Generated Music",
|
100 |
+
type="numpy"
|
101 |
+
)
|
102 |
+
|
103 |
+
gr.Markdown("### Tips:")
|
104 |
+
gr.Markdown("""
|
105 |
+
- Be specific in your descriptions (e.g., "slow blues guitar with harmonica")
|
106 |
+
- Higher temperature = more creative/random results
|
107 |
+
- Lower temperature = more predictable results
|
108 |
+
- Duration is limited to 30 seconds for faster generation
|
109 |
+
""")
|
110 |
+
|
111 |
+
# Example prompts
|
112 |
+
gr.Examples(
|
113 |
+
examples=[
|
114 |
+
["upbeat jazz with piano and drums"],
|
115 |
+
["relaxing acoustic guitar melody"],
|
116 |
+
["electronic dance music with heavy bass"],
|
117 |
+
["classical violin concerto"],
|
118 |
+
["reggae with steel drums and bass"],
|
119 |
+
["rock ballad with electric guitar solo"],
|
120 |
+
],
|
121 |
+
inputs=text_input,
|
122 |
+
label="Example Prompts"
|
123 |
+
)
|
124 |
+
|
125 |
+
# Connect the generate button to the function
|
126 |
+
generate_btn.click(
|
127 |
+
fn=generate_music,
|
128 |
+
inputs=[text_input, duration, temperature, top_k, top_p],
|
129 |
+
outputs=audio_output
|
130 |
+
)
|
131 |
+
|
132 |
+
if __name__ == "__main__":
|
133 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--index-url https://thestage.jfrog.io/artifactory/api/pypi/pypi-thestage-ai-production/simple
|
2 |
+
--extra-index-url https://pypi.nvidia.com
|
3 |
+
--extra-index-url https://pypi.org/simple
|
4 |
+
|
5 |
+
thestage
|
6 |
+
elastic_models[nvidia]
|
7 |
+
flash-attn==2.7.3
|
8 |
+
torch
|
9 |
+
scipy
|
10 |
+
transformers
|
11 |
+
gradio
|