sino
commited on
Commit
·
ed14d60
1
Parent(s):
99f673d
Update README.md
Browse files
README.md
CHANGED
@@ -39,47 +39,18 @@ model = AutoModel.from_pretrained('Tabgac/SpectPrompt', trust_remote_code=True)
|
|
39 |
device = model.device
|
40 |
# sample rate: 16k
|
41 |
music_path = '/path/to/music.wav'
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
# 1. extract logmel spectrogram
|
44 |
-
# 1.1 parameters
|
45 |
-
class FFT_parameters:
|
46 |
-
sample_rate = 16000
|
47 |
-
window_size = 400
|
48 |
-
n_fft = 400
|
49 |
-
hop_size = 160
|
50 |
-
n_mels = 80
|
51 |
-
f_min = 50
|
52 |
-
f_max = 8000
|
53 |
-
prms = FFT_parameters()
|
54 |
-
# 1.2. extract
|
55 |
-
import nnAudio.Spectrogram
|
56 |
-
import librosa
|
57 |
-
to_spec = nnAudio.Spectrogram.MelSpectrogram(
|
58 |
-
sr=prms.sample_rate,
|
59 |
-
n_fft=prms.n_fft,
|
60 |
-
win_length=prms.window_size,
|
61 |
-
hop_length=prms.hop_size,
|
62 |
-
n_mels=prms.n_mels,
|
63 |
-
fmin=prms.f_min,
|
64 |
-
fmax=prms.f_max,
|
65 |
-
center=True,
|
66 |
-
power=2,
|
67 |
-
verbose=False,
|
68 |
-
)
|
69 |
-
wav, ori_sr = librosa.load(music_path, mono=True, sr=prms.sample_rate)
|
70 |
-
lms = to_spec(torch.tensor(wav))
|
71 |
-
lms = (lms + torch.finfo().eps).log().to(device)
|
72 |
-
# 1.3. processing
|
73 |
import os
|
74 |
from torch.nn.utils.rnn import pad_sequence
|
75 |
import random
|
76 |
# get the file transforms.py from https://github.com/taugastcn/SpectPrompt.git
|
77 |
from transforms import Normalize, SpecRandomCrop, SpecPadding, SpecRepeat
|
78 |
-
|
79 |
-
|
80 |
transforms = [ Normalize(-4.5, 4.5), SpecRandomCrop(target_len=2992), SpecPadding(target_len=2992), SpecRepeat() ]
|
81 |
lms = lms.numpy()
|
82 |
-
|
83 |
for trans in transforms:
|
84 |
lms = trans(lms)
|
85 |
|
|
|
39 |
device = model.device
|
40 |
# sample rate: 16k
|
41 |
music_path = '/path/to/music.wav'
|
42 |
+
# 1. get logmelspectrogram
|
43 |
+
# get the file wav_to_mel.py from https://github.com/taugastcn/SpectPrompt.git
|
44 |
+
from wav_to_mel import wav_to_mel
|
45 |
+
lms = wav_to_mel(music_path)
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
import os
|
48 |
from torch.nn.utils.rnn import pad_sequence
|
49 |
import random
|
50 |
# get the file transforms.py from https://github.com/taugastcn/SpectPrompt.git
|
51 |
from transforms import Normalize, SpecRandomCrop, SpecPadding, SpecRepeat
|
|
|
|
|
52 |
transforms = [ Normalize(-4.5, 4.5), SpecRandomCrop(target_len=2992), SpecPadding(target_len=2992), SpecRepeat() ]
|
53 |
lms = lms.numpy()
|
|
|
54 |
for trans in transforms:
|
55 |
lms = trans(lms)
|
56 |
|