Enutrof commited on
Commit
9cc6970
·
1 Parent(s): 116a2a9

Added initial files

Browse files
Files changed (4) hide show
  1. app.py +8 -0
  2. gtzan10_lstm_0.7179_l_1.12.h5 +3 -0
  3. inference.py +60 -0
  4. requirements.txt +102 -0
app.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from inference import *
3
+
4
+ def greet(name):
5
+ return "Hello " + name + "!"
6
+
7
+ iface = gr.Interface(fn=inference, inputs="audio", outputs="text")
8
+ iface.launch()
gtzan10_lstm_0.7179_l_1.12.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:810bee018dd749eeb44e51f54435a96a813ce36721f24126687f061232a9e8bb
3
+ size 19417544
inference.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math, librosa
2
+ import numpy as np
3
+
4
+ from tensorflow import keras
5
+
6
+ SAMPLE_RATE = 22050
7
+ def extract_mfcc_batch(file_path, n_mfcc=13, n_fft=1024, hop_length=512, length_segment=10):
8
+ """
9
+ Extract and return an mfcc batch
10
+ MFCC - Mel Frequency Cepstrum Coefficients
11
+ """
12
+ mfcc_batch = []
13
+ num_samples_per_segment = 220500 #length_segment * SAMPLE_RATE
14
+ expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length)
15
+
16
+ signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
17
+
18
+ duration = librosa.get_duration(y=signal, sr=sr) #30 seconds
19
+ num_segments = int(duration/length_segment) #3
20
+ # process segments, extracting mfccs and storing data
21
+ for s in range(num_segments+1):
22
+ start_sample = num_samples_per_segment * s
23
+ finish_sample = start_sample + num_samples_per_segment
24
+ try:
25
+ mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample],
26
+ sr=SAMPLE_RATE,
27
+ n_fft=n_fft,
28
+ n_mfcc=n_mfcc,
29
+ hop_length=hop_length
30
+ )
31
+ #(13, 431)
32
+ mfcc = mfcc.T # A transpose
33
+ # store mfcc for segment if it has the expected length
34
+ if len(mfcc) == 431:
35
+ mfcc_batch.append(mfcc.tolist())
36
+ except:
37
+ continue
38
+ return mfcc_batch
39
+
40
+ def inference(filename, model_path='gtzan10_lstm_0.7179_l_1.12.h5'):
41
+ model = keras.models.load_model(model_path)
42
+ mapping = ['blues',
43
+ 'classical',
44
+ 'country',
45
+ 'disco',
46
+ 'hiphop',
47
+ 'jazz',
48
+ 'metal',
49
+ 'pop',
50
+ 'reggae',
51
+ 'rock']
52
+ mfcc = extract_mfcc_batch(filename)
53
+ pred = model.predict(mfcc)
54
+ genre = [mapping[i] for i in np.argmax(pred, axis=1)]
55
+
56
+ counter_ = {}
57
+ for i in genre:
58
+ counter_[genre.count(i)] = i
59
+ m = max(counter_)
60
+ return f"Genre: {counter_[m]}, Confidence: {max(counter_)/pred.shape[0]}"
requirements.txt ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.0.0
2
+ aiohttp==3.8.1
3
+ aiosignal==1.2.0
4
+ analytics-python==1.4.0
5
+ anyio==3.5.0
6
+ appdirs==1.4.4
7
+ asgiref==3.5.0
8
+ astunparse==1.6.3
9
+ async-timeout==4.0.2
10
+ attrs==21.4.0
11
+ audioread==2.1.9
12
+ backoff==1.10.0
13
+ bcrypt==3.2.0
14
+ cachetools==5.0.0
15
+ certifi==2021.10.8
16
+ cffi==1.15.0
17
+ charset-normalizer==2.0.12
18
+ click==8.0.4
19
+ colorama==0.4.4
20
+ cryptography==36.0.1
21
+ cycler==0.11.0
22
+ decorator==5.1.1
23
+ fastapi==0.74.0
24
+ ffmpy==0.3.0
25
+ flatbuffers==2.0
26
+ fonttools==4.29.1
27
+ frozenlist==1.3.0
28
+ gast==0.5.3
29
+ google-auth==2.6.0
30
+ google-auth-oauthlib==0.4.6
31
+ google-pasta==0.2.0
32
+ gradio==2.8.1
33
+ grpcio==1.44.0
34
+ h11==0.13.0
35
+ h5py==3.6.0
36
+ idna==3.3
37
+ importlib-metadata==4.11.1
38
+ Jinja2==3.0.3
39
+ joblib==1.1.0
40
+ keras==2.8.0
41
+ Keras-Preprocessing==1.1.2
42
+ kiwisolver==1.3.2
43
+ libclang==13.0.0
44
+ librosa==0.9.1
45
+ linkify-it-py==1.0.3
46
+ llvmlite==0.38.0
47
+ Markdown==3.3.6
48
+ markdown-it-py==2.0.1
49
+ MarkupSafe==2.1.0
50
+ matplotlib==3.5.1
51
+ mdit-py-plugins==0.3.0
52
+ mdurl==0.1.0
53
+ monotonic==1.6
54
+ multidict==6.0.2
55
+ numba==0.55.1
56
+ numpy==1.21.5
57
+ oauthlib==3.2.0
58
+ opt-einsum==3.3.0
59
+ packaging==21.3
60
+ pandas==1.4.1
61
+ paramiko==2.9.2
62
+ Pillow==9.0.1
63
+ pooch==1.6.0
64
+ protobuf==3.19.4
65
+ pyasn1==0.4.8
66
+ pyasn1-modules==0.2.8
67
+ pycparser==2.21
68
+ pycryptodome==3.14.1
69
+ pydantic==1.9.0
70
+ pydub==0.25.1
71
+ PyNaCl==1.5.0
72
+ pyparsing==3.0.7
73
+ pyspark==3.2.0
74
+ python-dateutil==2.8.2
75
+ python-multipart==0.0.5
76
+ pytz==2021.3
77
+ requests==2.27.1
78
+ requests-oauthlib==1.3.1
79
+ resampy==0.2.2
80
+ rsa==4.8
81
+ scikit-learn==1.0.2
82
+ scipy==1.8.0
83
+ six==1.16.0
84
+ sniffio==1.2.0
85
+ SoundFile==0.10.3.post1
86
+ starlette==0.17.1
87
+ tensorboard==2.8.0
88
+ tensorboard-data-server==0.6.1
89
+ tensorboard-plugin-wit==1.8.1
90
+ tensorflow==2.8.0
91
+ tensorflow-io-gcs-filesystem==0.24.0
92
+ termcolor==1.1.0
93
+ tf-estimator-nightly==2.8.0.dev2021122109
94
+ threadpoolctl==3.1.0
95
+ typing_extensions==4.1.1
96
+ uc-micro-py==1.0.1
97
+ urllib3==1.26.8
98
+ uvicorn==0.17.5
99
+ Werkzeug==2.0.3
100
+ wrapt==1.13.3
101
+ yarl==1.7.2
102
+ zipp==3.7.0