Kvikontent commited on
Commit
200e88e
·
verified ·
1 Parent(s): 690daad

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ from einops import rearrange
4
+ from stable_audio_tools import get_pretrained_model
5
+ from stable_audio_tools.inference.generation import generate_diffusion_cond
6
+
7
+ import spaces
8
+
9
+ # Download model
10
+ model, model_config = get_pretrained_model("stabilityai/stable-audio-open-1.0")
11
+ sample_rate = model_config["sample_rate"]
12
+ sample_size = model_config["sample_size"]
13
+
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+ model = model.to(device)
16
+
17
+ # --- Gradio App ---
18
+
19
+ def generate_music(prompt, seconds_total, bpm, genre):
20
+ """Generates music from a prompt using Stable Diffusion."""
21
+
22
+ # Set up text and timing conditioning
23
+ conditioning = [{
24
+ "prompt": f"{bpm} BPM {genre} {prompt}",
25
+ "seconds_start": 0,
26
+ "seconds_total": seconds_total
27
+ }]
28
+
29
+ # Generate stereo audio
30
+ output = generate_diffusion_cond(
31
+ model,
32
+ steps=100,
33
+ cfg_scale=7,
34
+ conditioning=conditioning,
35
+ sample_size=sample_size,
36
+ sigma_min=0.3,
37
+ sigma_max=500,
38
+ sampler_type="dpmpp-3m-sde",
39
+ device=device
40
+ )
41
+
42
+ # Rearrange audio batch to a single sequence
43
+ output = rearrange(output, "b d n -> d (b n)")
44
+
45
+ # Peak normalize, clip, convert to int16, and save to file
46
+ output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu()
47
+ return output
48
+
49
+ @spaces.GPU(duration=120)
50
+ def generate_music_and_save(prompt, seconds_total, bpm, genre):
51
+ """Generates music, saves it to a file, and returns the file path."""
52
+
53
+ output = generate_music(prompt, seconds_total, bpm, genre)
54
+ filename = "output.wav"
55
+ torchaudio.save(filename, output, sample_rate)
56
+ return filename
57
+
58
+ # Create Gradio interface
59
+ iface = spaces.Interface(
60
+ generate_music_and_save,
61
+ inputs=[
62
+ spaces.Textbox(label="Prompt (e.g., 'upbeat drum loop')", lines=1),
63
+ spaces.Slider(label="Duration (seconds)", minimum=1, maximum=60, step=1),
64
+ spaces.Slider(label="BPM", minimum=60, maximum=200, step=1),
65
+ spaces.Dropdown(label="Genre", choices=["pop", "rock", "hip hop", "electronic", "classical"], value="pop")
66
+ ],
67
+ outputs=[
68
+ spaces.Audio(label="Generated Music")
69
+ ],
70
+ title="Stable Audio Open",
71
+ description="Generate music from text prompts using Stable Audio."
72
+ )
73
+
74
+ iface.launch(share=True)