File size: 2,697 Bytes
bc62b2b
 
fe49032
bc62b2b
 
 
 
0bb0ef0
bc62b2b
 
 
fe49032
bc62b2b
 
fe49032
 
 
 
bc62b2b
0bb0ef0
bc62b2b
 
0bb0ef0
 
 
bc62b2b
 
 
fe49032
bc62b2b
 
 
 
 
 
0bb0ef0
 
fe49032
 
bc62b2b
fe49032
 
0bb0ef0
 
 
 
 
 
bc62b2b
 
 
 
 
 
 
 
 
 
0bb0ef0
 
fe49032
0bb0ef0
 
bc62b2b
 
 
fe49032
 
bc62b2b
0bb0ef0
bc62b2b
 
fe49032
0bb0ef0
 
 
 
 
 
 
 
 
 
 
 
fe49032
 
0bb0ef0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from audiocraft.models import MusicGen
import streamlit as st
import os
import torch
import torchaudio
import numpy as np
import base64
from io import BytesIO

@st.cache_resource
def load_model():
    model = MusicGen.get_pretrained("facebook/musicgen-small")
    return model

def generate_music_tensors(description, duration: int):
    print("Description:", description)
    print("Duration:", duration)
    model = load_model()

    # Experiment with different generation parameters for improved quality
    model.set_generation_params(
        use_sampling=True,
        top_k=300,          # Increase top_k for more diversity
        top_p=0.85,         # Probability threshold for token sampling
        temperature=0.8,    # Control randomness; lower values = more focused output
        duration=duration
    )

    output = model.generate(
        descriptions=[description],
        progress=True,
        return_tokens=True
    )
    return output[0]

def save_audio_to_bytes(samples: torch.Tensor):
    sample_rate = 32000
    assert samples.dim() == 2 or samples.dim() == 3
    samples = samples.detach().cpu()

    if samples.dim() == 2:
        samples = samples[None, ...]
    
    # Save audio to a byte buffer instead of file for easier download
    byte_io = BytesIO()
    torchaudio.save(byte_io, samples, sample_rate=sample_rate, format="wav")
    byte_io.seek(0)  # Reset buffer position to the beginning for reading
    return byte_io

st.set_page_config(
    page_icon=":musical_note:",
    page_title="Music Gen"
)

def main():
    st.title("Your Music")

    with st.expander("See Explanation"):
        st.write("App is developed using Meta's Audiocraft Music Gen model. Write a description and we will generate audio.")

    text_area = st.text_area("Enter description")
    time_slider = st.slider("Select time duration (seconds)", 2, 20, 10)

    if text_area and time_slider:
        st.json(
            {
                "Description": text_area,
                "Selected duration": time_slider
            }
            st.write("We will back with your music....please enjoy doing the rest of your tasks while we come back in some time :)")
        )
        st.subheader("Generated Music")
        music_tensors = generate_music_tensors(text_area, time_slider)

        # Save to byte buffer for download
        audio_file = save_audio_to_bytes(music_tensors)

        # Play and download audio
        st.audio(audio_file, format="audio/wav")
        st.download_button(
            label="Download Audio",
            data=audio_file,
            file_name="generated_music.wav",
            mime="audio/wav"
        )

if __name__ == "__main__":
    main()