diff --git a/src/config.py b/src/config.py index 69d527317c427a07a499154829130721ca5c6332..89414bc60877914a6ca266470113447d32a5c186 100644 --- a/src/config.py +++ b/src/config.py @@ -11,7 +11,7 @@ if not HF_API_KEY: # Labels d'émotions LABELS = {"colere": 0, "neutre": 1, "joie": 2} -LABELS = ["colere", "neutre", "joie"] +#LABELS = ["colere", "neutre", "joie"] NUM_LABELS = len(LABELS) # Choisir le device diff --git a/src/data/colere/c1ac.wav b/src/data/colere/c1ac.wav deleted file mode 100644 index de0b8dd9019ccc951cd8629881ce4ca8d25b3ec0..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1ac.wav and /dev/null differ diff --git a/src/data/colere/c1af.wav b/src/data/colere/c1af.wav deleted file mode 100644 index e1967a9476d751deeee0829c8243ad75926e2925..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1af.wav and /dev/null differ diff --git a/src/data/colere/c1aj.wav b/src/data/colere/c1aj.wav deleted file mode 100644 index c09590a452eb4b644898fa81fd7eef622cc154a3..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1aj.wav and /dev/null differ diff --git a/src/data/colere/c1an.wav b/src/data/colere/c1an.wav deleted file mode 100644 index 06686c906533609dba6f3f6e1d55027914c67d46..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1an.wav and /dev/null differ diff --git a/src/data/colere/c1bc.wav b/src/data/colere/c1bc.wav deleted file mode 100644 index 4c44ff67816c74daeea7c5fda572bb3337101bf0..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1bc.wav and /dev/null differ diff --git a/src/data/colere/c1bf.wav b/src/data/colere/c1bf.wav deleted file mode 100644 index 0bf087d6b04b648c2ac2a11c02a6449e5de39b06..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1bf.wav and /dev/null differ diff --git a/src/data/colere/c1bj.wav b/src/data/colere/c1bj.wav deleted file mode 100644 index 952b13d3c04eba087665f3dedfcbc36c20ecf6f7..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1bj.wav and /dev/null differ diff --git a/src/data/colere/c1bn.wav b/src/data/colere/c1bn.wav deleted file mode 100644 index 33ec132651db3818265b96205f6a278e672b876a..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1bn.wav and /dev/null differ diff --git a/src/data/colere/c1cc.wav b/src/data/colere/c1cc.wav deleted file mode 100644 index 8ff50bc7acb263721982227aa3b5232311e7755f..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1cc.wav and /dev/null differ diff --git a/src/data/colere/c1cf.wav b/src/data/colere/c1cf.wav deleted file mode 100644 index e4d6d8020307b58a16a8d21702929ff83250431b..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1cf.wav and /dev/null differ diff --git a/src/data/colere/c1cj.wav b/src/data/colere/c1cj.wav deleted file mode 100644 index 06aa30452f84fc6802156817c6916352fab369a3..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c1cj.wav and /dev/null differ diff --git a/src/data/colere/c2ac.wav b/src/data/colere/c2ac.wav deleted file mode 100644 index 524dca5257d009affa31a3114ec9e6f8ab2ad59d..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2ac.wav and /dev/null differ diff --git a/src/data/colere/c2af.wav b/src/data/colere/c2af.wav deleted file mode 100644 index 6560815da0edef8f5dafab89a79c257630a2ee8c..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2af.wav and /dev/null differ diff --git a/src/data/colere/c2aj.wav b/src/data/colere/c2aj.wav deleted file mode 100644 index 8b3072eedb4811913f970f3560c15f1f7b6dc606..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2aj.wav and /dev/null differ diff --git a/src/data/colere/c2an.wav b/src/data/colere/c2an.wav deleted file mode 100644 index a6cf62bcc41f3b0b14d21a798f97eb852a44d77b..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2an.wav and /dev/null differ diff --git a/src/data/colere/c2bc.wav b/src/data/colere/c2bc.wav deleted file mode 100644 index d1404728c8d82cfa448cb473334f4299f85186ef..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2bc.wav and /dev/null differ diff --git a/src/data/colere/c2bf.wav b/src/data/colere/c2bf.wav deleted file mode 100644 index 7916f8445a7e7e9c5e788d5086482b98c2de2a3b..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2bf.wav and /dev/null differ diff --git a/src/data/colere/c2bj.wav b/src/data/colere/c2bj.wav deleted file mode 100644 index e75a7815503da5795e0c2ec0f0e7a38f4a4a7726..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2bj.wav and /dev/null differ diff --git a/src/data/colere/c2bn.wav b/src/data/colere/c2bn.wav deleted file mode 100644 index 3572ffb0f9ee723c468d64c0560d9fa46c9e7453..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2bn.wav and /dev/null differ diff --git a/src/data/colere/c2cn.wav b/src/data/colere/c2cn.wav deleted file mode 100644 index c5669792e40ab64ce51fd7552875a239f5aa0d21..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c2cn.wav and /dev/null differ diff --git a/src/data/colere/c3ac.wav b/src/data/colere/c3ac.wav deleted file mode 100644 index fdbbafaddb9b7f7b5b26c254503d87efd1907143..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3ac.wav and /dev/null differ diff --git a/src/data/colere/c3af.wav b/src/data/colere/c3af.wav deleted file mode 100644 index c910e20f1768dd107f8f64d9e639b3a0335b5c4f..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3af.wav and /dev/null differ diff --git a/src/data/colere/c3aj.wav b/src/data/colere/c3aj.wav deleted file mode 100644 index 2690efc1d43fb0d5e375f27612b23b877af5d52c..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3aj.wav and /dev/null differ diff --git a/src/data/colere/c3an.wav b/src/data/colere/c3an.wav deleted file mode 100644 index cdb7dfae2514e330065534479d93c8fdcefc8f32..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3an.wav and /dev/null differ diff --git a/src/data/colere/c3bc.wav b/src/data/colere/c3bc.wav deleted file mode 100644 index ae2ad5df7eeb3af635d79f98706d7f54e097aeca..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3bc.wav and /dev/null differ diff --git a/src/data/colere/c3bf.wav b/src/data/colere/c3bf.wav deleted file mode 100644 index 2373fadc59c20c3b54461b2ab0c0e28572f9a895..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3bf.wav and /dev/null differ diff --git a/src/data/colere/c3bj.wav b/src/data/colere/c3bj.wav deleted file mode 100644 index 2f69d7b93fb02052e0511ed1e169b4112a092f31..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3bj.wav and /dev/null differ diff --git a/src/data/colere/c3bn.wav b/src/data/colere/c3bn.wav deleted file mode 100644 index d07cf0cd5705af273aa2f0e07f271c49c755caf7..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c3bn.wav and /dev/null differ diff --git a/src/data/colere/c4aaf.wav b/src/data/colere/c4aaf.wav deleted file mode 100644 index 5d907e2c92977e996324b5a53e112a3069356127..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4aaf.wav and /dev/null differ diff --git a/src/data/colere/c4ac.wav b/src/data/colere/c4ac.wav deleted file mode 100644 index 5d3366b06e894f00a0c5e6e50dcbbfc8bbbfa68f..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4ac.wav and /dev/null differ diff --git a/src/data/colere/c4af.wav b/src/data/colere/c4af.wav deleted file mode 100644 index 9773ee4395de583ae211f0ad2a128aaf08c0e1ac..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4af.wav and /dev/null differ diff --git a/src/data/colere/c4aj.wav b/src/data/colere/c4aj.wav deleted file mode 100644 index 3d78610dcdfcb552a5294560aaef45c569a469ae..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4aj.wav and /dev/null differ diff --git a/src/data/colere/c4an.wav b/src/data/colere/c4an.wav deleted file mode 100644 index ef8fe80cc9f42dc56a85e2c8a55d627f62538cbf..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4an.wav and /dev/null differ diff --git a/src/data/colere/c4bc.wav b/src/data/colere/c4bc.wav deleted file mode 100644 index 68e71bffe723e4190a9f0b2036e047060a8abdf2..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4bc.wav and /dev/null differ diff --git a/src/data/colere/c4bj.wav b/src/data/colere/c4bj.wav deleted file mode 100644 index 6d761876bba59855bf62305c427f2d2cf90afd8c..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4bj.wav and /dev/null differ diff --git a/src/data/colere/c4bn.wav b/src/data/colere/c4bn.wav deleted file mode 100644 index 70a8946f12f8d81aac745600413fb2116ad7b9ef..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c4bn.wav and /dev/null differ diff --git a/src/data/colere/c5an.wav b/src/data/colere/c5an.wav deleted file mode 100644 index e2dad6a8fbfbe716a7f156dc80de5e0d0547730f..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c5an.wav and /dev/null differ diff --git a/src/data/colere/c5c.wav b/src/data/colere/c5c.wav deleted file mode 100644 index 30d81f2fb02d01b39e401cf8bf61a96693189d45..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c5c.wav and /dev/null differ diff --git a/src/data/colere/c5f.wav b/src/data/colere/c5f.wav deleted file mode 100644 index 2ff974fedca62f277e712c8e5ab81a73ffe7bb78..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c5f.wav and /dev/null differ diff --git a/src/data/colere/c5j.wav b/src/data/colere/c5j.wav deleted file mode 100644 index 56876114625a8ff283758447a31a4e39be1194be..0000000000000000000000000000000000000000 Binary files a/src/data/colere/c5j.wav and /dev/null differ diff --git a/src/data/joie/h1ac.wav b/src/data/joie/h1ac.wav deleted file mode 100644 index 365b5e33c61769264184e3dc6500bd4c3bc97d27..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1ac.wav and /dev/null differ diff --git a/src/data/joie/h1af.wav b/src/data/joie/h1af.wav deleted file mode 100644 index d1e83a0dc15c85292082dafffa7e42f82ac442ba..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1af.wav and /dev/null differ diff --git a/src/data/joie/h1aj.wav b/src/data/joie/h1aj.wav deleted file mode 100644 index b5026f846cba16f5644f657bbd78cf0f53c48d47..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1aj.wav and /dev/null differ diff --git a/src/data/joie/h1an.wav b/src/data/joie/h1an.wav deleted file mode 100644 index 9f5738a18afb016c678c054e87fd64d9e86d7683..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1an.wav and /dev/null differ diff --git a/src/data/joie/h1bc.wav b/src/data/joie/h1bc.wav deleted file mode 100644 index bb046d0f5f400db7aa18d7bf58cec04e7fd6c894..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1bc.wav and /dev/null differ diff --git a/src/data/joie/h1bf.wav b/src/data/joie/h1bf.wav deleted file mode 100644 index 315810b5fcbee4409ac682d86bb69b91d6c565bc..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1bf.wav and /dev/null differ diff --git a/src/data/joie/h1bj.wav b/src/data/joie/h1bj.wav deleted file mode 100644 index 8e0fedd2096f1056fda0da1b8bb3914bf264a200..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1bj.wav and /dev/null differ diff --git a/src/data/joie/h1bn.wav b/src/data/joie/h1bn.wav deleted file mode 100644 index 1ce54c92e6539c176ab10ef10ab4d006d2dfc0b2..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h1bn.wav and /dev/null differ diff --git a/src/data/joie/h21f.wav b/src/data/joie/h21f.wav deleted file mode 100644 index 5d7a790132fb070d93e575f811dac8edd1bd5427..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h21f.wav and /dev/null differ diff --git a/src/data/joie/h2ac.wav b/src/data/joie/h2ac.wav deleted file mode 100644 index 33791d25b7c5eea0d6961e814c6d4262f5027a95..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2ac.wav and /dev/null differ diff --git a/src/data/joie/h2aj.wav b/src/data/joie/h2aj.wav deleted file mode 100644 index e90c82f5049ad1e06422cad974f3f21394cab115..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2aj.wav and /dev/null differ diff --git a/src/data/joie/h2an.wav b/src/data/joie/h2an.wav deleted file mode 100644 index d648fcab9d2d0d8a19d531001531242de3a7cfcb..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2an.wav and /dev/null differ diff --git a/src/data/joie/h2bc.wav b/src/data/joie/h2bc.wav deleted file mode 100644 index 07bb8b097d742140b24e511d062f0fddff8c0a3b..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2bc.wav and /dev/null differ diff --git a/src/data/joie/h2bf.wav b/src/data/joie/h2bf.wav deleted file mode 100644 index 9031d5e7b4272cd28394f90a3ad026c74786ee9b..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2bf.wav and /dev/null differ diff --git a/src/data/joie/h2bj.wav b/src/data/joie/h2bj.wav deleted file mode 100644 index d83e34bace9a886c1313ee31b108fe789f444b27..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2bj.wav and /dev/null differ diff --git a/src/data/joie/h2bn.wav b/src/data/joie/h2bn.wav deleted file mode 100644 index acdb5f6afd5460a0e4b7ab355b5516a2eba5ad3b..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h2bn.wav and /dev/null differ diff --git a/src/data/joie/h3ac.wav b/src/data/joie/h3ac.wav deleted file mode 100644 index 5e264c1a29951abb40c7d0200cbe7f54e22defc6..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3ac.wav and /dev/null differ diff --git a/src/data/joie/h3af.wav b/src/data/joie/h3af.wav deleted file mode 100644 index c761bf682554c77809e4639fbc1229a431d22a6b..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3af.wav and /dev/null differ diff --git a/src/data/joie/h3aj.wav b/src/data/joie/h3aj.wav deleted file mode 100644 index 72cb0ab63a2ce21685e5b87a8cb634d58fe77478..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3aj.wav and /dev/null differ diff --git a/src/data/joie/h3anwav.wav b/src/data/joie/h3anwav.wav deleted file mode 100644 index c7b60bb34eb44c992900e936ae2baa50555bb9fa..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3anwav.wav and /dev/null differ diff --git a/src/data/joie/h3bc.wav b/src/data/joie/h3bc.wav deleted file mode 100644 index c0a014783eb8ffe2b12eb4bfb8efa7138df888b2..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3bc.wav and /dev/null differ diff --git a/src/data/joie/h3bf.wav b/src/data/joie/h3bf.wav deleted file mode 100644 index c3e14c3c82b0a1a00d4d4770bc9ec0b4df2469a7..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3bf.wav and /dev/null differ diff --git a/src/data/joie/h3bj.wav b/src/data/joie/h3bj.wav deleted file mode 100644 index 656b23179c5d8af1cf57d609e2a0e338080f5073..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3bj.wav and /dev/null differ diff --git a/src/data/joie/h3bn.wav b/src/data/joie/h3bn.wav deleted file mode 100644 index a9d47024320f98f0115883b1d0f1921e47bf87a1..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h3bn.wav and /dev/null differ diff --git a/src/data/joie/h4ac.wav b/src/data/joie/h4ac.wav deleted file mode 100644 index ccdee2f3801542e054af868e734e4d5872963fe9..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4ac.wav and /dev/null differ diff --git a/src/data/joie/h4af.wav b/src/data/joie/h4af.wav deleted file mode 100644 index 6498be3632495172f0507994b062b61308f73833..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4af.wav and /dev/null differ diff --git a/src/data/joie/h4aj.wav b/src/data/joie/h4aj.wav deleted file mode 100644 index 654cd2ec6b3a64dd6cedf0aa4b8a808b387be550..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4aj.wav and /dev/null differ diff --git a/src/data/joie/h4an.wav b/src/data/joie/h4an.wav deleted file mode 100644 index 3d5b734baf2c6b9638fbf2f28fe31be099ed980c..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4an.wav and /dev/null differ diff --git a/src/data/joie/h4bc.wav b/src/data/joie/h4bc.wav deleted file mode 100644 index 2cb72dad7485a7f0503490a28ff69d20414d4de1..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4bc.wav and /dev/null differ diff --git a/src/data/joie/h4bf.wav b/src/data/joie/h4bf.wav deleted file mode 100644 index b3a3cf9bc1453b4b712491e995f8cda8acb654d0..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4bf.wav and /dev/null differ diff --git a/src/data/joie/h4bj.wav b/src/data/joie/h4bj.wav deleted file mode 100644 index 3bf1943162af54a1765e5ca923cf86ad6a7aac60..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4bj.wav and /dev/null differ diff --git a/src/data/joie/h4bn.wav b/src/data/joie/h4bn.wav deleted file mode 100644 index fa0da93f23242b1276b1a4fe1bcd16f7d240750b..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h4bn.wav and /dev/null differ diff --git a/src/data/joie/h5an.wav b/src/data/joie/h5an.wav deleted file mode 100644 index 9a1b1ae25e0b597b1dcb1c798a2b1a04758ad31d..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h5an.wav and /dev/null differ diff --git a/src/data/joie/h5c.wav b/src/data/joie/h5c.wav deleted file mode 100644 index 57c8e9ac7c6b312fa7de327493946647e4d48f42..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h5c.wav and /dev/null differ diff --git a/src/data/joie/h5f.wav b/src/data/joie/h5f.wav deleted file mode 100644 index 6591f721de5d8529e7d7e3ebd914ef18a04a1f29..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h5f.wav and /dev/null differ diff --git a/src/data/joie/h5j.wav b/src/data/joie/h5j.wav deleted file mode 100644 index 438254f915751beeea4991fa989f26548faec30f..0000000000000000000000000000000000000000 Binary files a/src/data/joie/h5j.wav and /dev/null differ diff --git a/src/data/neutre/n1ac.wav b/src/data/neutre/n1ac.wav deleted file mode 100644 index df8888182b1e7d26b7208736c00a4e4c827b2ed9..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1ac.wav and /dev/null differ diff --git a/src/data/neutre/n1af.wav b/src/data/neutre/n1af.wav deleted file mode 100644 index f67dc5106ff4a363629e9e5759ea5a5117d0662d..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1af.wav and /dev/null differ diff --git a/src/data/neutre/n1aj.wav b/src/data/neutre/n1aj.wav deleted file mode 100644 index e178b9938741c53c5e6298ad92425d63d11983d3..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1aj.wav and /dev/null differ diff --git a/src/data/neutre/n1an.wav b/src/data/neutre/n1an.wav deleted file mode 100644 index 01585a6924876a4a6c18c80d1aaa02d27f0d2c3f..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1an.wav and /dev/null differ diff --git a/src/data/neutre/n1bc.wav b/src/data/neutre/n1bc.wav deleted file mode 100644 index bb910de068a5ff76ac37010df55e6d8691079585..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1bc.wav and /dev/null differ diff --git a/src/data/neutre/n1bf.wav b/src/data/neutre/n1bf.wav deleted file mode 100644 index be4c834d19f22fe3d297639db4343bc411c2bbac..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1bf.wav and /dev/null differ diff --git a/src/data/neutre/n1bj.wav b/src/data/neutre/n1bj.wav deleted file mode 100644 index 8ede3179d45f938e574ac1dca63c84f4165220a4..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1bj.wav and /dev/null differ diff --git a/src/data/neutre/n1bn.wav b/src/data/neutre/n1bn.wav deleted file mode 100644 index 06df31510bc66528c77b294f52c97f268ce87e23..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n1bn.wav and /dev/null differ diff --git a/src/data/neutre/n2ac.wav b/src/data/neutre/n2ac.wav deleted file mode 100644 index feeaeebe063373ec1b64a325ce0717f5758339f6..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2ac.wav and /dev/null differ diff --git a/src/data/neutre/n2af.wav b/src/data/neutre/n2af.wav deleted file mode 100644 index 55a54bd3e58e65a57b44e56baca1e287f362ecc6..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2af.wav and /dev/null differ diff --git a/src/data/neutre/n2aj.wav b/src/data/neutre/n2aj.wav deleted file mode 100644 index 59f93bdbf838fba19ba0e7d0816b598b81bd4184..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2aj.wav and /dev/null differ diff --git a/src/data/neutre/n2an.wav b/src/data/neutre/n2an.wav deleted file mode 100644 index 4d93413421a30eb760a8dfabb532230aecaee3c6..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2an.wav and /dev/null differ diff --git a/src/data/neutre/n2bc.wav b/src/data/neutre/n2bc.wav deleted file mode 100644 index 6f7f9575cda53160b02d5a69f5cc220ab9bfb96a..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2bc.wav and /dev/null differ diff --git a/src/data/neutre/n2bf.wav b/src/data/neutre/n2bf.wav deleted file mode 100644 index 741ac264c281f98c69c45618b353606708b5b8b4..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2bf.wav and /dev/null differ diff --git a/src/data/neutre/n2bj.wav b/src/data/neutre/n2bj.wav deleted file mode 100644 index 89c347dee5c5eb81591d2f6020beabd9af187042..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2bj.wav and /dev/null differ diff --git a/src/data/neutre/n2bn.wav b/src/data/neutre/n2bn.wav deleted file mode 100644 index aa4ed5ac0d68bff9e35974749f1ae32af974146c..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n2bn.wav and /dev/null differ diff --git a/src/data/neutre/n3ac.wav b/src/data/neutre/n3ac.wav deleted file mode 100644 index d7820cff9112b655ff20a0af7c0f698ffed13473..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3ac.wav and /dev/null differ diff --git a/src/data/neutre/n3af.wav b/src/data/neutre/n3af.wav deleted file mode 100644 index 68f96a78afbe6e489cd9890e691a365fb6a48b45..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3af.wav and /dev/null differ diff --git a/src/data/neutre/n3aj.wav b/src/data/neutre/n3aj.wav deleted file mode 100644 index f773b81d7817332c75d92b5ef3de238444dbf528..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3aj.wav and /dev/null differ diff --git a/src/data/neutre/n3an.wav b/src/data/neutre/n3an.wav deleted file mode 100644 index 7f878937d9bdb690ce0d7744608a2ddaa3030f58..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3an.wav and /dev/null differ diff --git a/src/data/neutre/n3bc.wav b/src/data/neutre/n3bc.wav deleted file mode 100644 index 805840dc276cc7dd3e586b427618ce52dadb5961..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3bc.wav and /dev/null differ diff --git a/src/data/neutre/n3bf.wav b/src/data/neutre/n3bf.wav deleted file mode 100644 index 25a9b796b52624bf40f48db62d8af1e075abd413..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3bf.wav and /dev/null differ diff --git a/src/data/neutre/n3bj.wav b/src/data/neutre/n3bj.wav deleted file mode 100644 index 100191833e4b7b6ac0d873f5392726e68cb1df80..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3bj.wav and /dev/null differ diff --git a/src/data/neutre/n3bn.wav b/src/data/neutre/n3bn.wav deleted file mode 100644 index a8b8747acf885991076db4d1e62a431397021b8c..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n3bn.wav and /dev/null differ diff --git a/src/data/neutre/n4ac.wav b/src/data/neutre/n4ac.wav deleted file mode 100644 index aa52669b5afcd60c00cc5850e322a8a8f9011d3c..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n4ac.wav and /dev/null differ diff --git a/src/data/neutre/n4aj.wav b/src/data/neutre/n4aj.wav deleted file mode 100644 index 50675d1f6e73f1bff0c768c554549491bd69b08f..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n4aj.wav and /dev/null differ diff --git a/src/data/neutre/n4an.wav b/src/data/neutre/n4an.wav deleted file mode 100644 index e475e8e5bb0d12eb41cc0464e48f8172db58409b..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n4an.wav and /dev/null differ diff --git a/src/data/neutre/n4f.wav b/src/data/neutre/n4f.wav deleted file mode 100644 index f04549721cb9271f4a2376c65589f2fdd99e0118..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n4f.wav and /dev/null differ diff --git a/src/data/neutre/n5ac.wav b/src/data/neutre/n5ac.wav deleted file mode 100644 index a525f07fc015351d0bcf47265179c83f4e827784..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5ac.wav and /dev/null differ diff --git a/src/data/neutre/n5af.wav b/src/data/neutre/n5af.wav deleted file mode 100644 index 232357862a49cb8a0c85fa1d2f9f14562c97d9d0..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5af.wav and /dev/null differ diff --git a/src/data/neutre/n5aj.wav b/src/data/neutre/n5aj.wav deleted file mode 100644 index 68f9c0e8a52504e82c7c5f1bee45676f77076fdd..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5aj.wav and /dev/null differ diff --git a/src/data/neutre/n5an.wav b/src/data/neutre/n5an.wav deleted file mode 100644 index 6f6b32117e4dc5405925ad7ea23c4f97dacca673..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5an.wav and /dev/null differ diff --git a/src/data/neutre/n5bc.wav b/src/data/neutre/n5bc.wav deleted file mode 100644 index da2385a2cce740655e6b23aec563fe4ca0793945..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5bc.wav and /dev/null differ diff --git a/src/data/neutre/n5bf.wav b/src/data/neutre/n5bf.wav deleted file mode 100644 index f9c23dbfa6511a770362b5b0330512dafe38096e..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5bf.wav and /dev/null differ diff --git a/src/data/neutre/n5bj.wav b/src/data/neutre/n5bj.wav deleted file mode 100644 index d999c3d54d8b17347fa81d74491e2ac64e672209..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5bj.wav and /dev/null differ diff --git a/src/data/neutre/n5bn.wav b/src/data/neutre/n5bn.wav deleted file mode 100644 index 039b3a3b1ae7e27418e74ea0959da94aab27c3a5..0000000000000000000000000000000000000000 Binary files a/src/data/neutre/n5bn.wav and /dev/null differ diff --git a/src/model/emotion_classifier.py b/src/model/emotion_classifier.py index 2bdc8635b4b97f593e548b4843c0efb8eb8fee00..41252230e808acd74f00e213717691c3a2cfa4c1 100644 --- a/src/model/emotion_classifier.py +++ b/src/model/emotion_classifier.py @@ -22,11 +22,14 @@ import torch.nn as nn class EmotionClassifier(nn.Module): def __init__(self, feature_dim, num_labels=3): super(EmotionClassifier, self).__init__() - self.fc = nn.Linear(feature_dim.config.hidden_size, num_labels) - self.softmax = nn.Softmax(dim=1) + self.fc = nn.Linear(feature_dim, num_labels) + self.dropout = nn.Dropout(0.3) # Evite l'overfitting - def forward(self, input_values): - outputs = self(input_values).last_hidden_state - pooled_output = torch.mean(outputs, dim=1) + def forward(self, x): + pooled_output = torch.mean(x, dim=1) # Moyenne des features audio + pooled_output = self.dropout(pooled_output) # Dropout avant classification logits = self.fc(pooled_output) - return self.softmax(logits) + return logits + + + diff --git a/src/model/feature_extractor.py b/src/model/feature_extractor.py index 16cb0dfe206444d53459862698e141d0011b176e..13d0b60995bdf465611a6b792b5b455e4e05c8e2 100644 --- a/src/model/feature_extractor.py +++ b/src/model/feature_extractor.py @@ -1,6 +1,6 @@ import torch from transformers import Wav2Vec2Model, Wav2Vec2Processor -from src.config import MODEL_NAME, DEVICE +from config import MODEL_NAME, DEVICE processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) feature_extractor = Wav2Vec2Model.from_pretrained(MODEL_NAME).to(DEVICE) diff --git a/src/model/transcriber.py b/src/model/transcriber.py index 0919cd86edeb4fd587fad78adf9236326a7df711..90ca6d9f27690c6285016d06cbc60dbd9d08fc00 100644 --- a/src/model/transcriber.py +++ b/src/model/transcriber.py @@ -1,27 +1,35 @@ import os import torch -from transformers import Wav2Vec2Processor -from src.model.emotion_classifier import Wav2Vec2EmotionClassifier import librosa +from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC # Charger le modèle et le processeur device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -# if st. -processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-xlsr-53-french") -model = Wav2Vec2EmotionClassifier() -model.load_state_dict(torch.load(os.path.join("src","model","wav2vec2_emotion.pth"), map_location=torch.device('cpu')), strict=False) -model.to(device) +MODEL_NAME = "facebook/wav2vec2-large-xlsr-53-french" +processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME) +model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device) +model.eval() -def transcribe_audio(audio, sampling_rate=16000): - # Préparer les données d'entrée pour le modèle - input_values = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_values +def transcribe_audio(audio_path, sampling_rate=16000): + # Charger l'audio + audio, sr = librosa.load(audio_path, sr=sampling_rate) - # Passer les données dans le modèle pour obtenir les logits + # Transformer l'audio en entrée pour le modèle + input_values = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_values.to(device) + + # Obtenir les prédictions with torch.no_grad(): logits = model(input_values).logits # Décoder les prédictions en texte predicted_ids = torch.argmax(logits, dim=-1) transcription = processor.batch_decode(predicted_ids)[0] - return transcription \ No newline at end of file + return transcription + +# Exemple d'utilisation +if __name__ == "__main__": + base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data")) + audio_path = os.path.join(base_path, "colere", "c1af.wav") + texte = transcribe_audio(audio_path) + print(f"Transcription : {texte}") diff --git a/src/predict.py b/src/predict.py index 57aef7fd91ef67da9cf34a3bc3647182e7c79183..17c3af5b0716616294e4341369aad6afe326b342 100644 --- a/src/predict.py +++ b/src/predict.py @@ -1,63 +1,50 @@ import torch -import torchaudio import librosa -import soundfile as sf import numpy as np -from src.model.emotion_classifier import EmotionClassifier -from src.model.feature_extractor import feature_extractor, processor -from src.utils.preprocessing import resampler -from src.config import DEVICE, LABELS +from model.emotion_classifier import EmotionClassifier +from utils.preprocessing import collate_fn +from config import DEVICE, NUM_LABELS import os +# Charger le modèle entraîné +MODEL_PATH = "acc_model.pth" +feature_dim = 40 # Nombre de MFCCs utilisés +model = EmotionClassifier(feature_dim, NUM_LABELS).to(DEVICE) +model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE)) +model.eval() # Mode évaluation -# Charger le modèle sauvegardé -classifier = EmotionClassifier(feature_extractor.config.hidden_size, len(LABELS)).to(DEVICE) -classifier.load_state_dict(torch.load(os.path.join("src","model","best_emotion_model.pth"), map_location=torch.device(DEVICE)), strict=False) -classifier.eval() +# Fonction pour prédire l’émotion d’un fichier audio +def predict_emotion(audio_path, max_length=128): + # Charger l’audio + y, sr = librosa.load(audio_path, sr=16000) + # Extraire les MFCCs + mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40) -# Fonction de prédiction -def predict_emotion(speech, output_probs=False, sampling_rate=16000): - # Charger l'audio - # waveform, sample_rate = librosa.load(speech, sr=None) - # speech_audio, sample_rate = sf.read(speech, dtype="float32") - - # Rééchantillonnage si nécessaire - # if sample_rate != sampling_rate: - # speech = torch.tensor(speech).unsqueeze(0) - # speech = resampler(speech).squeeze(0).numpy() + # Ajuster la taille des MFCCs avec padding/troncature + if mfcc.shape[1] > max_length: + mfcc = mfcc[:, :max_length] # Tronquer si trop long + else: + pad_width = max_length - mfcc.shape[1] + mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant') - # Extraire les features - inputs = processor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True) - input_values = inputs.input_values.to(DEVICE) + # Convertir en tenseur PyTorch + input_tensor = torch.tensor(mfcc.T, dtype=torch.float32).unsqueeze(0).to(DEVICE) # (1, max_length, 40) + # Prédiction avec le modèle with torch.no_grad(): - features = feature_extractor(input_values).last_hidden_state.mean(dim=1) - logits = classifier(features) - - if output_probs: - # Appliquer softmax pour obtenir des probabilités - probabilities = torch.nn.functional.softmax(logits, dim=-1) - - # Convertir en numpy array et prendre le premier (et seul) élément - probabilities = probabilities[0].detach().cpu().numpy() - - # Créer un dictionnaire associant chaque émotion à sa probabilité - emotion_probabilities = {emotion: prob for emotion, prob in zip(LABELS, probabilities)} - # emotion_probabilities = {"emotions": [emotion for emotion in emotion_labels], - # "probabilities": [prob for prob in probabilities]} - return emotion_probabilities - else: - # Obtenir l'émotion la plus probable (i.e. la prédiction) - predicted_label = torch.argmax(logits, dim=-1).item() - emotion = LABELS[predicted_label] + logits = model(input_tensor) + predicted_class = torch.argmax(logits, dim=-1).item() + + # Définition des labels + LABELS = {0: "colère", 1: "neutre", 2: "joie"} + return LABELS[predicted_class] - return emotion -# Exemple d'utilisation -# if __name__ == "__main__": -# base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) -# audio_file = os.path.join(base_path, "colere", "c1ac.wav") -# emotion = predict_emotion(audio_file) -# print(f"🎤 L'émotion prédite est : {emotion}") +#Exemple d'utilisation +if __name__ == "__main__": + base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) + audio_file = os.path.join(base_path, "colere", "c1ac.wav") + emotion = predict_emotion(audio_file) + print(f"🎤 L'émotion prédite est : {emotion}") diff --git a/src/train.py b/src/train.py index 2a52f2f133f012734781219e2c0db42ba6352306..21a0a415da2fbdb18497ec31f98dae4cab027e6a 100644 --- a/src/train.py +++ b/src/train.py @@ -1,93 +1,86 @@ import torch import torch.optim as optim import torch.nn as nn +from torch.utils.data import DataLoader import numpy as np from sklearn.metrics import accuracy_score from utils.dataset import load_audio_data -from utils.preprocessing import preprocess_audio, prepare_features +from utils.preprocessing import preprocess_audio, prepare_features, collate_fn from model.emotion_classifier import EmotionClassifier -from model.feature_extrator import feature_extractor, processor from config import DEVICE, NUM_LABELS import os -# Charger les données +# 🔹 Charger les données data_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) -print(f"data dir {data_dir}") ds = load_audio_data(data_dir) -# Prétraitement +# 🔹 Prétraitement des données ds = ds.map(preprocess_audio) +ds = ds.map(lambda batch: prepare_features(batch, max_length=128)) -# Ajustement de la longueur maximale -lengths = [len(sample["speech"]) for sample in ds] -max_length = int(np.percentile(lengths, 95)) - -ds = ds.map(lambda batch: prepare_features(batch, max_length)) - -# Séparation en train et test +# 🔹 Séparation en train et test ds = ds.train_test_split(test_size=0.2) train_ds, test_ds = ds["train"], ds["test"] -# Instancier le modèle -classifier = EmotionClassifier(feature_extractor.config.hidden_size, NUM_LABELS).to(DEVICE) +# 🔹 Création des DataLoaders avec `collate_fn` +train_loader = DataLoader(train_ds, batch_size=8, shuffle=True, collate_fn=collate_fn) +test_loader = DataLoader(test_ds, batch_size=8, shuffle=False, collate_fn=collate_fn) + +# 🔹 Instancier le modèle +feature_dim = 40 # Nombre de MFCCs +classifier = EmotionClassifier(feature_dim, NUM_LABELS).to(DEVICE) -# Fonction d'entraînement -def train_classifier(classifier, train_ds, test_ds, epochs=20, batch_size=8): - optimizer = optim.AdamW(classifier.parameters(), lr=2e-5, weight_decay=0.01) +# 🔹 Fonction d'entraînement +def train_classifier(classifier, train_loader, test_loader, epochs=20, lr=2e-4): + optimizer = optim.AdamW(classifier.parameters(), lr=lr, weight_decay=0.01) loss_fn = nn.CrossEntropyLoss() best_accuracy = 0.0 for epoch in range(epochs): classifier.train() total_loss, correct = 0, 0 - batch_count = 0 - for i in range(0, len(train_ds), batch_size): - batch = train_ds[i: i + batch_size] + for inputs, labels in train_loader: + inputs, labels = inputs.to(DEVICE), labels.to(DEVICE) optimizer.zero_grad() - input_values = processor( - batch["speech"], - sampling_rate=16000, - return_tensors="pt", - padding=True, - truncation=True, - max_length=max_length - ).input_values.to(DEVICE) - - with torch.no_grad(): - features = feature_extractor(input_values).last_hidden_state.mean(dim=1) - - logits = classifier(features) - labels = torch.tensor(batch["label"], dtype=torch.long, device=DEVICE) - - if labels.numel() == 0: - continue - + logits = classifier(inputs) loss = loss_fn(logits, labels) + loss.backward() optimizer.step() total_loss += loss.item() correct += (logits.argmax(dim=-1) == labels).sum().item() - batch_count += 1 - train_acc = correct / len(train_ds) + train_acc = correct / len(train_loader.dataset) + # 🔹 Sauvegarde du meilleur modèle if train_acc > best_accuracy: best_accuracy = train_acc - torch.save({ - "classifier_state_dict": classifier.state_dict(), - "feature_extractor_state_dict": feature_extractor.state_dict(), - "processor": processor - }, "acc_model.pth") - print(f"Nouveau meilleur modèle sauvegardé ! Accuracy: {best_accuracy:.4f}") + torch.save(classifier.state_dict(), "acc_model.pth") + print(f"✅ Nouveau meilleur modèle sauvegardé ! Accuracy: {best_accuracy:.4f}") - print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss/batch_count:.4f} - Accuracy: {train_acc:.4f}") + print(f"📊 Epoch {epoch+1}/{epochs} - Loss: {total_loss/len(train_loader):.4f} - Accuracy: {train_acc:.4f}") return classifier -# Lancer l'entraînement -trained_classifier = train_classifier(classifier, train_ds, test_ds, epochs=20, batch_size=8) +# 🔹 Fonction d'évaluation +def evaluate(model, test_loader): + model.eval() + all_preds, all_labels = [], [] + + with torch.no_grad(): + for inputs, labels in test_loader: + inputs, labels = inputs.to(DEVICE), labels.to(DEVICE) + logits = model(inputs) + preds = torch.argmax(logits, dim=-1).cpu().numpy() + all_preds.extend(preds) + all_labels.extend(labels.cpu().numpy()) + + return accuracy_score(all_labels, all_preds) + +# 🔹 Lancer l'entraînement +trained_classifier = train_classifier(classifier, train_loader, test_loader, epochs=20, lr=2e-4) print("✅ Entraînement terminé, le meilleur modèle a été sauvegardé !") diff --git a/src/utils/dataset.py b/src/utils/dataset.py index 1703739f79997950d747c00530c7b61d1e37ff7d..a275f0d98f1f0d1d936a323958ce99b2876b26e6 100644 --- a/src/utils/dataset.py +++ b/src/utils/dataset.py @@ -1,6 +1,7 @@ import os from datasets import Dataset from config import LABELS +import pandas as pd def load_audio_data(data_dir): data = [] @@ -11,3 +12,25 @@ def load_audio_data(data_dir): file_path = os.path.join(label_dir, file) data.append({"path": file_path, "label": label_id}) return Dataset.from_list(data) + + +# def load_audio_data_from_csv(csv_path, data_dir): +# data = [] +# df = pd.read_csv(csv_path, sep=",", header=0) +# print(df.head()) + +# for _, row in df.iterrows(): +# file_path = os.path.join(data_dir, row["dossier"]) +# label = row["emotion"] + +# if os.path.exists(file_path) and label in LABELS: +# data.append({"path": file_path, "label": LABELS[label]}) +# else: +# print(f"⚠️ Fichier manquant ou label inconnu : {file_path} - {label}") + +# return Dataset.from_list(data) + +# #Charger le dataset à partir du CSV +# csv_path = os.path.abspath(os.path.join(os.path.dirname(file), "new_data", "dataset.csv")) +# data_dir = os.path.abspath(os.path.join(os.path.dirname(file), "new_data")) +# ds = load_audio_data_from_csv(csv_path, data_dir) diff --git a/src/utils/preprocessing.py b/src/utils/preprocessing.py index 9e56d04f50c5cf6336a834d7d8de51c61ab8cd33..70cf9591b80bfad2bde60ee9502cbfd6bb0ee6d9 100644 --- a/src/utils/preprocessing.py +++ b/src/utils/preprocessing.py @@ -1,33 +1,82 @@ +import librosa import soundfile as sf import torch import torchaudio import numpy as np -from src.model.feature_extractor import processor # type: ignore -from src.config import DEVICE +from model.feature_extractor import processor # type: ignore +from config import DEVICE -# Resampler +# Resampler pour convertir en 16kHz resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000) def preprocess_audio(batch): speech, sample_rate = sf.read(batch["path"], dtype="float32") + # Convertir en numpy array si ce n'est pas déjà le cas + speech = np.array(speech, dtype=np.float32) + + # Vérifier que le format est bien float32 + if speech.dtype != np.float32: + raise ValueError(f"Le fichier {batch['path']} n'est pas en float32.") + + # Resampling si nécessaire if sample_rate != 16000: - speech = torch.tensor(speech).unsqueeze(0) - speech = resampler(speech).squeeze(0).numpy() - - batch["speech"] = speech.tolist() + speech = torch.tensor(speech).unsqueeze(0) # Ajouter une dimension pour le resampler + speech = resampler(speech).squeeze(0).numpy() # Appliquer le resampler et reconvertir en numpy array + + batch["speech"] = speech batch["sampling_rate"] = 16000 return batch + def prepare_features(batch, max_length): - features = processor( - batch["speech"], - sampling_rate=16000, - padding=True, - truncation=True, - max_length=max_length, - return_tensors="pt" - ) - batch["input_values"] = features.input_values.squeeze(0) - batch["label"] = torch.tensor(batch["label"], dtype=torch.long) + y, sr = batch["speech"], 16000 + + # S'assurer que y est bien un numpy array float32 + if not isinstance(y, np.ndarray): + y = np.array(y, dtype=np.float32) + + # Vérifier que les valeurs sont bien en float32 + y = y.astype(np.float32) + + # Extraction des MFCCs + mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40) + + # Debugging: afficher la forme des MFCCs + print(f"MFCC original shape: {mfcc.shape}") + + # Ajuster la longueur des MFCCs + if mfcc.shape[1] > max_length: + mfcc = mfcc[:, :max_length] # Tronquer si trop long + else: + pad_width = max_length - mfcc.shape[1] + mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant') # Padding si trop court + + print(f"MFCC padded shape: {mfcc.shape}") + + # Convertir en tensor PyTorch et stocker + batch["input_values"] = torch.tensor(mfcc.T, dtype=torch.float32) # Transposer pour obtenir (max_length, 40) return batch + + +def collate_fn(batch): + """ Fonction pour assembler les batchs avec padding des features """ + + # Récupérer les features (MFCC) et les labels + inputs = [np.array(sample["input_values"], dtype=np.float32) for sample in batch] + labels = torch.tensor([sample["label"] for sample in batch], dtype=torch.long) + + # Vérifier que inputs est bien une liste de numpy arrays + #print(f"Types des inputs: {[type(x) for x in inputs]}") # Debugging + + # Trouver la longueur max des MFCC dans ce batch + max_length = max([x.shape[0] for x in inputs]) + + # Appliquer un padding avec des zéros pour uniformiser les tailles + padded_inputs = [np.pad(x, ((0, max_length - x.shape[0]), (0, 0)), mode="constant") for x in inputs] + + # Convertir en tenseur PyTorch + inputs_tensor = torch.tensor(padded_inputs, dtype=torch.float32) + + return inputs_tensor, labels +