Spaces:
Sleeping
Sleeping
feat: Implement alternative solution achieving functional goals
Browse filesThis commit represents a mixed outcome. While the implementation successfully achieves the intended functionality, it diverges from the original plan. The solution, though unconventional, meets the necessary requirements and ensures operational effectiveness. Future revisions may align it more closely with the initial strategy.
- Cargo.lock +245 -18
- Cargo.toml +7 -1
- config/dev.yaml +17 -9
- ggml-metal.metal +0 -0
- src/config.rs +70 -32
- src/main.rs +3 -3
- src/whisper.rs +208 -106
Cargo.lock
CHANGED
@@ -37,6 +37,15 @@ dependencies = [
|
|
37 |
"memchr",
|
38 |
]
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
[[package]]
|
41 |
name = "anyhow"
|
42 |
version = "1.0.75"
|
@@ -62,7 +71,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
|
|
62 |
dependencies = [
|
63 |
"proc-macro2",
|
64 |
"quote",
|
65 |
-
"syn",
|
66 |
]
|
67 |
|
68 |
[[package]]
|
@@ -73,7 +82,18 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9"
|
|
73 |
dependencies = [
|
74 |
"proc-macro2",
|
75 |
"quote",
|
76 |
-
"syn",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
]
|
78 |
|
79 |
[[package]]
|
@@ -511,6 +531,29 @@ dependencies = [
|
|
511 |
"vsimd",
|
512 |
]
|
513 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
514 |
[[package]]
|
515 |
name = "bindgen"
|
516 |
version = "0.68.1"
|
@@ -518,7 +561,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
518 |
checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
|
519 |
dependencies = [
|
520 |
"bitflags 2.4.1",
|
521 |
-
"cexpr",
|
522 |
"clang-sys",
|
523 |
"lazy_static",
|
524 |
"lazycell",
|
@@ -529,9 +572,9 @@ dependencies = [
|
|
529 |
"quote",
|
530 |
"regex",
|
531 |
"rustc-hash",
|
532 |
-
"shlex",
|
533 |
-
"syn",
|
534 |
-
"which",
|
535 |
]
|
536 |
|
537 |
[[package]]
|
@@ -586,13 +629,22 @@ dependencies = [
|
|
586 |
"libc",
|
587 |
]
|
588 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
589 |
[[package]]
|
590 |
name = "cexpr"
|
591 |
version = "0.6.0"
|
592 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
593 |
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
594 |
dependencies = [
|
595 |
-
"nom",
|
596 |
]
|
597 |
|
598 |
[[package]]
|
@@ -612,6 +664,21 @@ dependencies = [
|
|
612 |
"libloading",
|
613 |
]
|
614 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
615 |
[[package]]
|
616 |
name = "cmake"
|
617 |
version = "0.1.50"
|
@@ -630,7 +697,7 @@ dependencies = [
|
|
630 |
"async-trait",
|
631 |
"json5",
|
632 |
"lazy_static",
|
633 |
-
"nom",
|
634 |
"pathdiff",
|
635 |
"ron",
|
636 |
"rust-ini",
|
@@ -722,6 +789,19 @@ version = "1.9.0"
|
|
722 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
723 |
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
724 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
725 |
[[package]]
|
726 |
name = "equivalent"
|
727 |
version = "1.0.1"
|
@@ -788,7 +868,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb"
|
|
788 |
dependencies = [
|
789 |
"proc-macro2",
|
790 |
"quote",
|
791 |
-
"syn",
|
792 |
]
|
793 |
|
794 |
[[package]]
|
@@ -818,6 +898,15 @@ dependencies = [
|
|
818 |
"slab",
|
819 |
]
|
820 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
821 |
[[package]]
|
822 |
name = "generic-array"
|
823 |
version = "0.14.7"
|
@@ -909,6 +998,15 @@ dependencies = [
|
|
909 |
"http",
|
910 |
]
|
911 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
912 |
[[package]]
|
913 |
name = "hermit-abi"
|
914 |
version = "0.3.3"
|
@@ -939,6 +1037,12 @@ dependencies = [
|
|
939 |
"windows-sys",
|
940 |
]
|
941 |
|
|
|
|
|
|
|
|
|
|
|
|
|
942 |
[[package]]
|
943 |
name = "http"
|
944 |
version = "0.2.9"
|
@@ -973,6 +1077,12 @@ version = "1.0.3"
|
|
973 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
974 |
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
|
975 |
|
|
|
|
|
|
|
|
|
|
|
|
|
976 |
[[package]]
|
977 |
name = "hyper"
|
978 |
version = "0.14.27"
|
@@ -1078,6 +1188,16 @@ version = "0.2.150"
|
|
1078 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1079 |
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
|
1080 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1081 |
[[package]]
|
1082 |
name = "libloading"
|
1083 |
version = "0.7.4"
|
@@ -1173,6 +1293,16 @@ dependencies = [
|
|
1173 |
"windows-sys",
|
1174 |
]
|
1175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1176 |
[[package]]
|
1177 |
name = "nom"
|
1178 |
version = "7.1.3"
|
@@ -1218,7 +1348,7 @@ version = "1.16.0"
|
|
1218 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1219 |
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
1220 |
dependencies = [
|
1221 |
-
"hermit-abi",
|
1222 |
"libc",
|
1223 |
]
|
1224 |
|
@@ -1337,7 +1467,7 @@ dependencies = [
|
|
1337 |
"pest_meta",
|
1338 |
"proc-macro2",
|
1339 |
"quote",
|
1340 |
-
"syn",
|
1341 |
]
|
1342 |
|
1343 |
[[package]]
|
@@ -1405,7 +1535,7 @@ dependencies = [
|
|
1405 |
"proc-macro-crate",
|
1406 |
"proc-macro2",
|
1407 |
"quote",
|
1408 |
-
"syn",
|
1409 |
]
|
1410 |
|
1411 |
[[package]]
|
@@ -1420,6 +1550,8 @@ dependencies = [
|
|
1420 |
"aws-sdk-translate",
|
1421 |
"config",
|
1422 |
"futures-util",
|
|
|
|
|
1423 |
"once_cell",
|
1424 |
"poem",
|
1425 |
"serde",
|
@@ -1429,6 +1561,7 @@ dependencies = [
|
|
1429 |
"tokio-stream",
|
1430 |
"tracing",
|
1431 |
"tracing-subscriber",
|
|
|
1432 |
"whisper-rs",
|
1433 |
"whisper-rs-sys",
|
1434 |
]
|
@@ -1452,7 +1585,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1452 |
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
|
1453 |
dependencies = [
|
1454 |
"proc-macro2",
|
1455 |
-
"syn",
|
1456 |
]
|
1457 |
|
1458 |
[[package]]
|
@@ -1764,7 +1897,7 @@ checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1"
|
|
1764 |
dependencies = [
|
1765 |
"proc-macro2",
|
1766 |
"quote",
|
1767 |
-
"syn",
|
1768 |
]
|
1769 |
|
1770 |
[[package]]
|
@@ -1834,6 +1967,12 @@ dependencies = [
|
|
1834 |
"lazy_static",
|
1835 |
]
|
1836 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1837 |
[[package]]
|
1838 |
name = "shlex"
|
1839 |
version = "1.2.0"
|
@@ -1890,12 +2029,29 @@ version = "0.9.8"
|
|
1890 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1891 |
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
1892 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1893 |
[[package]]
|
1894 |
name = "subtle"
|
1895 |
version = "2.5.0"
|
1896 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1897 |
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
1898 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1899 |
[[package]]
|
1900 |
name = "syn"
|
1901 |
version = "2.0.39"
|
@@ -1907,6 +2063,24 @@ dependencies = [
|
|
1907 |
"unicode-ident",
|
1908 |
]
|
1909 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1910 |
[[package]]
|
1911 |
name = "thiserror"
|
1912 |
version = "1.0.50"
|
@@ -1924,7 +2098,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
|
|
1924 |
dependencies = [
|
1925 |
"proc-macro2",
|
1926 |
"quote",
|
1927 |
-
"syn",
|
1928 |
]
|
1929 |
|
1930 |
[[package]]
|
@@ -2006,7 +2180,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
|
|
2006 |
dependencies = [
|
2007 |
"proc-macro2",
|
2008 |
"quote",
|
2009 |
-
"syn",
|
2010 |
]
|
2011 |
|
2012 |
[[package]]
|
@@ -2107,7 +2281,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
|
|
2107 |
dependencies = [
|
2108 |
"proc-macro2",
|
2109 |
"quote",
|
2110 |
-
"syn",
|
2111 |
]
|
2112 |
|
2113 |
[[package]]
|
@@ -2149,6 +2323,29 @@ dependencies = [
|
|
2149 |
"tracing-log",
|
2150 |
]
|
2151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2152 |
[[package]]
|
2153 |
name = "try-lock"
|
2154 |
version = "0.2.4"
|
@@ -2225,6 +2422,12 @@ dependencies = [
|
|
2225 |
"tinyvec",
|
2226 |
]
|
2227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
2228 |
[[package]]
|
2229 |
name = "unsafe-libyaml"
|
2230 |
version = "0.2.9"
|
@@ -2272,6 +2475,12 @@ version = "0.1.0"
|
|
2272 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2273 |
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
2274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
2275 |
[[package]]
|
2276 |
name = "version_check"
|
2277 |
version = "0.9.4"
|
@@ -2299,6 +2508,15 @@ version = "0.11.0+wasi-snapshot-preview1"
|
|
2299 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2300 |
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
2301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2302 |
[[package]]
|
2303 |
name = "which"
|
2304 |
version = "4.4.2"
|
@@ -2324,7 +2542,7 @@ name = "whisper-rs-sys"
|
|
2324 |
version = "0.7.3"
|
2325 |
source = "git+https://github.com/mingyang91/whisper-rs.git#f8e424a19b13cc348395afd862f0dcb864fcb1fc"
|
2326 |
dependencies = [
|
2327 |
-
"bindgen",
|
2328 |
"cfg-if",
|
2329 |
"cmake",
|
2330 |
"fs_extra",
|
@@ -2346,6 +2564,15 @@ version = "0.4.0"
|
|
2346 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2347 |
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
2348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2349 |
[[package]]
|
2350 |
name = "winapi-x86_64-pc-windows-gnu"
|
2351 |
version = "0.4.0"
|
|
|
37 |
"memchr",
|
38 |
]
|
39 |
|
40 |
+
[[package]]
|
41 |
+
name = "ansi_term"
|
42 |
+
version = "0.12.1"
|
43 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
44 |
+
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
|
45 |
+
dependencies = [
|
46 |
+
"winapi",
|
47 |
+
]
|
48 |
+
|
49 |
[[package]]
|
50 |
name = "anyhow"
|
51 |
version = "1.0.75"
|
|
|
71 |
dependencies = [
|
72 |
"proc-macro2",
|
73 |
"quote",
|
74 |
+
"syn 2.0.39",
|
75 |
]
|
76 |
|
77 |
[[package]]
|
|
|
82 |
dependencies = [
|
83 |
"proc-macro2",
|
84 |
"quote",
|
85 |
+
"syn 2.0.39",
|
86 |
+
]
|
87 |
+
|
88 |
+
[[package]]
|
89 |
+
name = "atty"
|
90 |
+
version = "0.2.14"
|
91 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
92 |
+
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
93 |
+
dependencies = [
|
94 |
+
"hermit-abi 0.1.19",
|
95 |
+
"libc",
|
96 |
+
"winapi",
|
97 |
]
|
98 |
|
99 |
[[package]]
|
|
|
531 |
"vsimd",
|
532 |
]
|
533 |
|
534 |
+
[[package]]
|
535 |
+
name = "bindgen"
|
536 |
+
version = "0.56.0"
|
537 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
538 |
+
checksum = "2da379dbebc0b76ef63ca68d8fc6e71c0f13e59432e0987e508c1820e6ab5239"
|
539 |
+
dependencies = [
|
540 |
+
"bitflags 1.3.2",
|
541 |
+
"cexpr 0.4.0",
|
542 |
+
"clang-sys",
|
543 |
+
"clap",
|
544 |
+
"env_logger",
|
545 |
+
"lazy_static",
|
546 |
+
"lazycell",
|
547 |
+
"log",
|
548 |
+
"peeking_take_while",
|
549 |
+
"proc-macro2",
|
550 |
+
"quote",
|
551 |
+
"regex",
|
552 |
+
"rustc-hash",
|
553 |
+
"shlex 0.1.1",
|
554 |
+
"which 3.1.1",
|
555 |
+
]
|
556 |
+
|
557 |
[[package]]
|
558 |
name = "bindgen"
|
559 |
version = "0.68.1"
|
|
|
561 |
checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
|
562 |
dependencies = [
|
563 |
"bitflags 2.4.1",
|
564 |
+
"cexpr 0.6.0",
|
565 |
"clang-sys",
|
566 |
"lazy_static",
|
567 |
"lazycell",
|
|
|
572 |
"quote",
|
573 |
"regex",
|
574 |
"rustc-hash",
|
575 |
+
"shlex 1.2.0",
|
576 |
+
"syn 2.0.39",
|
577 |
+
"which 4.4.2",
|
578 |
]
|
579 |
|
580 |
[[package]]
|
|
|
629 |
"libc",
|
630 |
]
|
631 |
|
632 |
+
[[package]]
|
633 |
+
name = "cexpr"
|
634 |
+
version = "0.4.0"
|
635 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
636 |
+
checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27"
|
637 |
+
dependencies = [
|
638 |
+
"nom 5.1.3",
|
639 |
+
]
|
640 |
+
|
641 |
[[package]]
|
642 |
name = "cexpr"
|
643 |
version = "0.6.0"
|
644 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
645 |
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
646 |
dependencies = [
|
647 |
+
"nom 7.1.3",
|
648 |
]
|
649 |
|
650 |
[[package]]
|
|
|
664 |
"libloading",
|
665 |
]
|
666 |
|
667 |
+
[[package]]
|
668 |
+
name = "clap"
|
669 |
+
version = "2.34.0"
|
670 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
671 |
+
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
|
672 |
+
dependencies = [
|
673 |
+
"ansi_term",
|
674 |
+
"atty",
|
675 |
+
"bitflags 1.3.2",
|
676 |
+
"strsim",
|
677 |
+
"textwrap",
|
678 |
+
"unicode-width",
|
679 |
+
"vec_map",
|
680 |
+
]
|
681 |
+
|
682 |
[[package]]
|
683 |
name = "cmake"
|
684 |
version = "0.1.50"
|
|
|
697 |
"async-trait",
|
698 |
"json5",
|
699 |
"lazy_static",
|
700 |
+
"nom 7.1.3",
|
701 |
"pathdiff",
|
702 |
"ron",
|
703 |
"rust-ini",
|
|
|
789 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
790 |
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
791 |
|
792 |
+
[[package]]
|
793 |
+
name = "env_logger"
|
794 |
+
version = "0.8.4"
|
795 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
796 |
+
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
|
797 |
+
dependencies = [
|
798 |
+
"atty",
|
799 |
+
"humantime",
|
800 |
+
"log",
|
801 |
+
"regex",
|
802 |
+
"termcolor",
|
803 |
+
]
|
804 |
+
|
805 |
[[package]]
|
806 |
name = "equivalent"
|
807 |
version = "1.0.1"
|
|
|
868 |
dependencies = [
|
869 |
"proc-macro2",
|
870 |
"quote",
|
871 |
+
"syn 2.0.39",
|
872 |
]
|
873 |
|
874 |
[[package]]
|
|
|
898 |
"slab",
|
899 |
]
|
900 |
|
901 |
+
[[package]]
|
902 |
+
name = "fvad"
|
903 |
+
version = "0.1.3"
|
904 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
905 |
+
checksum = "8b8e04cf8731da968d9456575a0ae44cb8760dee46169a5289a0e87d4cc4743a"
|
906 |
+
dependencies = [
|
907 |
+
"libfvad-sys",
|
908 |
+
]
|
909 |
+
|
910 |
[[package]]
|
911 |
name = "generic-array"
|
912 |
version = "0.14.7"
|
|
|
998 |
"http",
|
999 |
]
|
1000 |
|
1001 |
+
[[package]]
|
1002 |
+
name = "hermit-abi"
|
1003 |
+
version = "0.1.19"
|
1004 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1005 |
+
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
1006 |
+
dependencies = [
|
1007 |
+
"libc",
|
1008 |
+
]
|
1009 |
+
|
1010 |
[[package]]
|
1011 |
name = "hermit-abi"
|
1012 |
version = "0.3.3"
|
|
|
1037 |
"windows-sys",
|
1038 |
]
|
1039 |
|
1040 |
+
[[package]]
|
1041 |
+
name = "hound"
|
1042 |
+
version = "3.5.1"
|
1043 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1044 |
+
checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f"
|
1045 |
+
|
1046 |
[[package]]
|
1047 |
name = "http"
|
1048 |
version = "0.2.9"
|
|
|
1077 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1078 |
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
|
1079 |
|
1080 |
+
[[package]]
|
1081 |
+
name = "humantime"
|
1082 |
+
version = "2.1.0"
|
1083 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1084 |
+
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
1085 |
+
|
1086 |
[[package]]
|
1087 |
name = "hyper"
|
1088 |
version = "0.14.27"
|
|
|
1188 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1189 |
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
|
1190 |
|
1191 |
+
[[package]]
|
1192 |
+
name = "libfvad-sys"
|
1193 |
+
version = "1.0.0"
|
1194 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1195 |
+
checksum = "473b5389760c65fab561600c78f609ee5779799ae8d29818eccea95d8a8c94d8"
|
1196 |
+
dependencies = [
|
1197 |
+
"bindgen 0.56.0",
|
1198 |
+
"cc",
|
1199 |
+
]
|
1200 |
+
|
1201 |
[[package]]
|
1202 |
name = "libloading"
|
1203 |
version = "0.7.4"
|
|
|
1293 |
"windows-sys",
|
1294 |
]
|
1295 |
|
1296 |
+
[[package]]
|
1297 |
+
name = "nom"
|
1298 |
+
version = "5.1.3"
|
1299 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1300 |
+
checksum = "08959a387a676302eebf4ddbcbc611da04285579f76f88ee0506c63b1a61dd4b"
|
1301 |
+
dependencies = [
|
1302 |
+
"memchr",
|
1303 |
+
"version_check",
|
1304 |
+
]
|
1305 |
+
|
1306 |
[[package]]
|
1307 |
name = "nom"
|
1308 |
version = "7.1.3"
|
|
|
1348 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1349 |
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
1350 |
dependencies = [
|
1351 |
+
"hermit-abi 0.3.3",
|
1352 |
"libc",
|
1353 |
]
|
1354 |
|
|
|
1467 |
"pest_meta",
|
1468 |
"proc-macro2",
|
1469 |
"quote",
|
1470 |
+
"syn 2.0.39",
|
1471 |
]
|
1472 |
|
1473 |
[[package]]
|
|
|
1535 |
"proc-macro-crate",
|
1536 |
"proc-macro2",
|
1537 |
"quote",
|
1538 |
+
"syn 2.0.39",
|
1539 |
]
|
1540 |
|
1541 |
[[package]]
|
|
|
1550 |
"aws-sdk-translate",
|
1551 |
"config",
|
1552 |
"futures-util",
|
1553 |
+
"fvad",
|
1554 |
+
"hound",
|
1555 |
"once_cell",
|
1556 |
"poem",
|
1557 |
"serde",
|
|
|
1561 |
"tokio-stream",
|
1562 |
"tracing",
|
1563 |
"tracing-subscriber",
|
1564 |
+
"tracing-test",
|
1565 |
"whisper-rs",
|
1566 |
"whisper-rs-sys",
|
1567 |
]
|
|
|
1585 |
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
|
1586 |
dependencies = [
|
1587 |
"proc-macro2",
|
1588 |
+
"syn 2.0.39",
|
1589 |
]
|
1590 |
|
1591 |
[[package]]
|
|
|
1897 |
dependencies = [
|
1898 |
"proc-macro2",
|
1899 |
"quote",
|
1900 |
+
"syn 2.0.39",
|
1901 |
]
|
1902 |
|
1903 |
[[package]]
|
|
|
1967 |
"lazy_static",
|
1968 |
]
|
1969 |
|
1970 |
+
[[package]]
|
1971 |
+
name = "shlex"
|
1972 |
+
version = "0.1.1"
|
1973 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1974 |
+
checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
|
1975 |
+
|
1976 |
[[package]]
|
1977 |
name = "shlex"
|
1978 |
version = "1.2.0"
|
|
|
2029 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2030 |
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
2031 |
|
2032 |
+
[[package]]
|
2033 |
+
name = "strsim"
|
2034 |
+
version = "0.8.0"
|
2035 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2036 |
+
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
2037 |
+
|
2038 |
[[package]]
|
2039 |
name = "subtle"
|
2040 |
version = "2.5.0"
|
2041 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2042 |
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
2043 |
|
2044 |
+
[[package]]
|
2045 |
+
name = "syn"
|
2046 |
+
version = "1.0.109"
|
2047 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2048 |
+
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
2049 |
+
dependencies = [
|
2050 |
+
"proc-macro2",
|
2051 |
+
"quote",
|
2052 |
+
"unicode-ident",
|
2053 |
+
]
|
2054 |
+
|
2055 |
[[package]]
|
2056 |
name = "syn"
|
2057 |
version = "2.0.39"
|
|
|
2063 |
"unicode-ident",
|
2064 |
]
|
2065 |
|
2066 |
+
[[package]]
|
2067 |
+
name = "termcolor"
|
2068 |
+
version = "1.4.0"
|
2069 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2070 |
+
checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449"
|
2071 |
+
dependencies = [
|
2072 |
+
"winapi-util",
|
2073 |
+
]
|
2074 |
+
|
2075 |
+
[[package]]
|
2076 |
+
name = "textwrap"
|
2077 |
+
version = "0.11.0"
|
2078 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2079 |
+
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
|
2080 |
+
dependencies = [
|
2081 |
+
"unicode-width",
|
2082 |
+
]
|
2083 |
+
|
2084 |
[[package]]
|
2085 |
name = "thiserror"
|
2086 |
version = "1.0.50"
|
|
|
2098 |
dependencies = [
|
2099 |
"proc-macro2",
|
2100 |
"quote",
|
2101 |
+
"syn 2.0.39",
|
2102 |
]
|
2103 |
|
2104 |
[[package]]
|
|
|
2180 |
dependencies = [
|
2181 |
"proc-macro2",
|
2182 |
"quote",
|
2183 |
+
"syn 2.0.39",
|
2184 |
]
|
2185 |
|
2186 |
[[package]]
|
|
|
2281 |
dependencies = [
|
2282 |
"proc-macro2",
|
2283 |
"quote",
|
2284 |
+
"syn 2.0.39",
|
2285 |
]
|
2286 |
|
2287 |
[[package]]
|
|
|
2323 |
"tracing-log",
|
2324 |
]
|
2325 |
|
2326 |
+
[[package]]
|
2327 |
+
name = "tracing-test"
|
2328 |
+
version = "0.2.4"
|
2329 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2330 |
+
checksum = "3a2c0ff408fe918a94c428a3f2ad04e4afd5c95bbc08fcf868eff750c15728a4"
|
2331 |
+
dependencies = [
|
2332 |
+
"lazy_static",
|
2333 |
+
"tracing-core",
|
2334 |
+
"tracing-subscriber",
|
2335 |
+
"tracing-test-macro",
|
2336 |
+
]
|
2337 |
+
|
2338 |
+
[[package]]
|
2339 |
+
name = "tracing-test-macro"
|
2340 |
+
version = "0.2.4"
|
2341 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2342 |
+
checksum = "258bc1c4f8e2e73a977812ab339d503e6feeb92700f6d07a6de4d321522d5c08"
|
2343 |
+
dependencies = [
|
2344 |
+
"lazy_static",
|
2345 |
+
"quote",
|
2346 |
+
"syn 1.0.109",
|
2347 |
+
]
|
2348 |
+
|
2349 |
[[package]]
|
2350 |
name = "try-lock"
|
2351 |
version = "0.2.4"
|
|
|
2422 |
"tinyvec",
|
2423 |
]
|
2424 |
|
2425 |
+
[[package]]
|
2426 |
+
name = "unicode-width"
|
2427 |
+
version = "0.1.11"
|
2428 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2429 |
+
checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
|
2430 |
+
|
2431 |
[[package]]
|
2432 |
name = "unsafe-libyaml"
|
2433 |
version = "0.2.9"
|
|
|
2475 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2476 |
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
2477 |
|
2478 |
+
[[package]]
|
2479 |
+
name = "vec_map"
|
2480 |
+
version = "0.8.2"
|
2481 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2482 |
+
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
|
2483 |
+
|
2484 |
[[package]]
|
2485 |
name = "version_check"
|
2486 |
version = "0.9.4"
|
|
|
2508 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2509 |
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
2510 |
|
2511 |
+
[[package]]
|
2512 |
+
name = "which"
|
2513 |
+
version = "3.1.1"
|
2514 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2515 |
+
checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724"
|
2516 |
+
dependencies = [
|
2517 |
+
"libc",
|
2518 |
+
]
|
2519 |
+
|
2520 |
[[package]]
|
2521 |
name = "which"
|
2522 |
version = "4.4.2"
|
|
|
2542 |
version = "0.7.3"
|
2543 |
source = "git+https://github.com/mingyang91/whisper-rs.git#f8e424a19b13cc348395afd862f0dcb864fcb1fc"
|
2544 |
dependencies = [
|
2545 |
+
"bindgen 0.68.1",
|
2546 |
"cfg-if",
|
2547 |
"cmake",
|
2548 |
"fs_extra",
|
|
|
2564 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2565 |
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
2566 |
|
2567 |
+
[[package]]
|
2568 |
+
name = "winapi-util"
|
2569 |
+
version = "0.1.6"
|
2570 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2571 |
+
checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
|
2572 |
+
dependencies = [
|
2573 |
+
"winapi",
|
2574 |
+
]
|
2575 |
+
|
2576 |
[[package]]
|
2577 |
name = "winapi-x86_64-pc-windows-gnu"
|
2578 |
version = "0.4.0"
|
Cargo.toml
CHANGED
@@ -18,8 +18,9 @@ serde_json = "1.0"
|
|
18 |
serde_yaml = "0.9"
|
19 |
tokio = { version = "1.33", features = ["macros", "rt-multi-thread", "sync", "signal"] }
|
20 |
tokio-stream = "0.1"
|
21 |
-
tracing = "0.1"
|
22 |
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
|
|
23 |
|
24 |
[dependencies.poem]
|
25 |
version = "1.3"
|
@@ -27,6 +28,11 @@ features = ["websocket", "static-files"]
|
|
27 |
|
28 |
[dependencies.whisper-rs]
|
29 |
git = "https://github.com/mingyang91/whisper-rs.git"
|
|
|
30 |
[dependencies.whisper-rs-sys]
|
31 |
git = "https://github.com/mingyang91/whisper-rs.git"
|
32 |
package = "whisper-rs-sys"
|
|
|
|
|
|
|
|
|
|
18 |
serde_yaml = "0.9"
|
19 |
tokio = { version = "1.33", features = ["macros", "rt-multi-thread", "sync", "signal"] }
|
20 |
tokio-stream = "0.1"
|
21 |
+
tracing = { version = "0.1", features = [] }
|
22 |
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
23 |
+
fvad = "0.1"
|
24 |
|
25 |
[dependencies.poem]
|
26 |
version = "1.3"
|
|
|
28 |
|
29 |
[dependencies.whisper-rs]
|
30 |
git = "https://github.com/mingyang91/whisper-rs.git"
|
31 |
+
features = ["coreml", "metal"]
|
32 |
[dependencies.whisper-rs-sys]
|
33 |
git = "https://github.com/mingyang91/whisper-rs.git"
|
34 |
package = "whisper-rs-sys"
|
35 |
+
|
36 |
+
[dev-dependencies]
|
37 |
+
hound = "3.5.1"
|
38 |
+
tracing-test = "*"
|
config/dev.yaml
CHANGED
@@ -2,24 +2,32 @@ server:
|
|
2 |
port: 8080
|
3 |
host: 0.0.0.0
|
4 |
whisper:
|
5 |
-
length_ms:
|
6 |
keep_ms: 200
|
7 |
step_ms: 5000
|
8 |
-
model: "models/ggml-large-
|
9 |
-
|
|
|
|
|
|
|
10 |
params:
|
11 |
-
|
12 |
-
max_tokens: 0
|
13 |
-
audio_ctx: 0
|
14 |
speed_up: false
|
15 |
-
single_segment:
|
16 |
translate: false
|
17 |
-
|
18 |
-
temperature_inc:
|
|
|
|
|
|
|
19 |
print_special: false
|
20 |
print_progress: false
|
21 |
print_realtime: false
|
|
|
22 |
no_context: false
|
23 |
no_timestamps: false
|
|
|
24 |
tinydiarize: false
|
25 |
language: "en"
|
|
|
2 |
port: 8080
|
3 |
host: 0.0.0.0
|
4 |
whisper:
|
5 |
+
length_ms: 5000
|
6 |
keep_ms: 200
|
7 |
step_ms: 5000
|
8 |
+
model: "models/ggml-large-v3.bin"
|
9 |
+
# model: "models/ggml-base.bin"
|
10 |
+
# model: "models/ggml-medium.en.bin"
|
11 |
+
max_prompt_tokens: 32
|
12 |
+
context_confidence_threshold: 0.5
|
13 |
params:
|
14 |
+
# n_threads: 8
|
15 |
+
# max_tokens: 0
|
16 |
+
# audio_ctx: 0
|
17 |
speed_up: false
|
18 |
+
single_segment: true
|
19 |
translate: false
|
20 |
+
# temperature_inc: 0.2 #0.4
|
21 |
+
# temperature_inc: 0
|
22 |
+
# entropy_threshold: 2.5
|
23 |
+
# entropy_threshold: 2.8
|
24 |
+
# n_max_text_ctx: 64 #16384
|
25 |
print_special: false
|
26 |
print_progress: false
|
27 |
print_realtime: false
|
28 |
+
token_timestamps: false
|
29 |
no_context: false
|
30 |
no_timestamps: false
|
31 |
+
suppress_non_speech_tokens: false
|
32 |
tinydiarize: false
|
33 |
language: "en"
|
ggml-metal.metal
ADDED
The diff for this file is too large to render.
See raw diff
|
|
src/config.rs
CHANGED
@@ -3,7 +3,7 @@ use std::{env, ffi::c_int, net::IpAddr};
|
|
3 |
use config::{Config, Environment, File};
|
4 |
use once_cell::sync::Lazy;
|
5 |
use serde::Deserialize;
|
6 |
-
use whisper_rs::FullParams;
|
7 |
use tracing::debug;
|
8 |
|
9 |
pub(crate) static SETTINGS: Lazy<Settings> =
|
@@ -12,28 +12,33 @@ pub(crate) static SETTINGS: Lazy<Settings> =
|
|
12 |
#[derive(Debug, Deserialize, Clone)]
|
13 |
pub(crate) struct WhisperConfig {
|
14 |
pub(crate) params: WhisperParams,
|
15 |
-
pub(crate) step_ms:
|
16 |
-
pub(crate) length_ms:
|
17 |
-
pub(crate) keep_ms:
|
18 |
pub(crate) model: String,
|
19 |
pub(crate) max_prompt_tokens: usize,
|
|
|
20 |
}
|
21 |
|
22 |
#[allow(dead_code)]
|
23 |
#[derive(Debug, Deserialize, Clone)]
|
24 |
pub(crate) struct WhisperParams {
|
25 |
pub(crate) n_threads: Option<usize>,
|
26 |
-
pub(crate) max_tokens: u32
|
27 |
-
pub(crate) audio_ctx: u32
|
28 |
-
pub(crate) speed_up: bool
|
29 |
-
pub(crate) translate: bool
|
30 |
-
pub(crate)
|
31 |
-
pub(crate) print_special: bool
|
32 |
-
pub(crate) print_realtime: bool
|
33 |
-
pub(crate) print_progress: bool
|
34 |
-
pub(crate)
|
35 |
-
pub(crate)
|
36 |
-
pub(crate)
|
|
|
|
|
|
|
|
|
37 |
// pub(crate) tinydiarize: bool,
|
38 |
pub(crate) language: Option<String>,
|
39 |
}
|
@@ -41,25 +46,58 @@ pub(crate) struct WhisperParams {
|
|
41 |
impl WhisperParams {
|
42 |
pub(crate) fn to_full_params<'a, 'b>(&'a self, tokens: &'b [c_int]) -> FullParams<'a, 'b> {
|
43 |
let mut param = FullParams::new(Default::default());
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
let
|
54 |
-
.
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
// param.set_tdrz_enable(self.tinydiarize);
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
|
|
63 |
param
|
64 |
}
|
65 |
}
|
|
|
3 |
use config::{Config, Environment, File};
|
4 |
use once_cell::sync::Lazy;
|
5 |
use serde::Deserialize;
|
6 |
+
use whisper_rs::{FullParams};
|
7 |
use tracing::debug;
|
8 |
|
9 |
pub(crate) static SETTINGS: Lazy<Settings> =
|
|
|
12 |
#[derive(Debug, Deserialize, Clone)]
|
13 |
pub(crate) struct WhisperConfig {
|
14 |
pub(crate) params: WhisperParams,
|
15 |
+
pub(crate) step_ms: usize,
|
16 |
+
pub(crate) length_ms: usize,
|
17 |
+
pub(crate) keep_ms: usize,
|
18 |
pub(crate) model: String,
|
19 |
pub(crate) max_prompt_tokens: usize,
|
20 |
+
pub(crate) context_confidence_threshold: f32,
|
21 |
}
|
22 |
|
23 |
#[allow(dead_code)]
|
24 |
#[derive(Debug, Deserialize, Clone)]
|
25 |
pub(crate) struct WhisperParams {
|
26 |
pub(crate) n_threads: Option<usize>,
|
27 |
+
pub(crate) max_tokens: Option<u32>,
|
28 |
+
pub(crate) audio_ctx: Option<u32>,
|
29 |
+
pub(crate) speed_up: Option<bool>,
|
30 |
+
pub(crate) translate: Option<bool>,
|
31 |
+
pub(crate) no_context: Option<bool>,
|
32 |
+
pub(crate) print_special: Option<bool>,
|
33 |
+
pub(crate) print_realtime: Option<bool>,
|
34 |
+
pub(crate) print_progress: Option<bool>,
|
35 |
+
pub(crate) token_timestamps: Option<bool>,
|
36 |
+
pub(crate) no_timestamps: Option<bool>,
|
37 |
+
pub(crate) temperature_inc: Option<f32>,
|
38 |
+
pub(crate) entropy_threshold: Option<f32>,
|
39 |
+
pub(crate) single_segment: Option<bool>,
|
40 |
+
pub(crate) suppress_non_speech_tokens: Option<bool>,
|
41 |
+
pub(crate) n_max_text_ctx: Option<usize>,
|
42 |
// pub(crate) tinydiarize: bool,
|
43 |
pub(crate) language: Option<String>,
|
44 |
}
|
|
|
46 |
impl WhisperParams {
|
47 |
pub(crate) fn to_full_params<'a, 'b>(&'a self, tokens: &'b [c_int]) -> FullParams<'a, 'b> {
|
48 |
let mut param = FullParams::new(Default::default());
|
49 |
+
if let Some(print_progress) = self.print_progress.as_ref() {
|
50 |
+
param.set_print_progress(*print_progress);
|
51 |
+
}
|
52 |
+
if let Some(print_special) = self.print_special.as_ref() {
|
53 |
+
param.set_print_special(*print_special);
|
54 |
+
}
|
55 |
+
if let Some(print_realtime) = self.print_realtime.as_ref() {
|
56 |
+
param.set_print_realtime(*print_realtime);
|
57 |
+
}
|
58 |
+
if let Some(single_segment) = self.single_segment.as_ref() {
|
59 |
+
param.set_single_segment(*single_segment);
|
60 |
+
}
|
61 |
+
if let Some(no_timestamps) = self.no_timestamps.as_ref() {
|
62 |
+
param.set_print_timestamps(!no_timestamps);
|
63 |
+
}
|
64 |
+
if let Some(token_timestamps) = self.token_timestamps.as_ref() {
|
65 |
+
param.set_token_timestamps(*token_timestamps);
|
66 |
+
}
|
67 |
+
if let Some(translate) = self.translate.as_ref() {
|
68 |
+
param.set_translate(*translate);
|
69 |
+
}
|
70 |
+
if let Some(max_tokens) = self.max_tokens.as_ref() {
|
71 |
+
param.set_max_tokens(*max_tokens as i32);
|
72 |
+
}
|
73 |
+
param.set_language(self.language.as_deref());
|
74 |
+
if let Some(n_threads) = self.n_threads.as_ref() {
|
75 |
+
param.set_n_threads(*n_threads as i32);
|
76 |
+
}
|
77 |
+
if let Some(audio_ctx) = self.audio_ctx.as_ref() {
|
78 |
+
param.set_audio_ctx(*audio_ctx as i32);
|
79 |
+
}
|
80 |
+
if let Some(speed_up) = self.speed_up.as_ref() {
|
81 |
+
param.set_speed_up(*speed_up);
|
82 |
+
}
|
83 |
// param.set_tdrz_enable(self.tinydiarize);
|
84 |
+
if let Some(temperature_inc) = self.temperature_inc.as_ref() {
|
85 |
+
param.set_temperature_inc(*temperature_inc);
|
86 |
+
}
|
87 |
+
if let Some(suppress_non_speech_tokens) = self.suppress_non_speech_tokens.as_ref() {
|
88 |
+
param.set_suppress_non_speech_tokens(*suppress_non_speech_tokens);
|
89 |
+
}
|
90 |
+
if let Some(no_context) = self.no_context.as_ref() {
|
91 |
+
param.set_no_context(*no_context);
|
92 |
+
}
|
93 |
+
if let Some(entropy_threshold) = self.entropy_threshold.as_ref() {
|
94 |
+
param.set_entropy_thold(*entropy_threshold);
|
95 |
+
}
|
96 |
+
if let Some(n_max_text_ctx) = self.n_max_text_ctx.as_ref() {
|
97 |
+
param.set_n_max_text_ctx(*n_max_text_ctx as i32);
|
98 |
+
}
|
99 |
|
100 |
+
param.set_tokens(tokens);
|
101 |
param
|
102 |
}
|
103 |
}
|
src/main.rs
CHANGED
@@ -104,7 +104,7 @@ async fn stream_speaker(
|
|
104 |
ws.on_upgrade(|mut socket| async move {
|
105 |
let _origin_tx = lesson.voice_channel();
|
106 |
let mut transcribe_rx = lesson.transcript_channel();
|
107 |
-
let whisper = WhisperHandler::new(SETTINGS.whisper.clone(), prompt)
|
108 |
.expect("failed to create whisper");
|
109 |
let mut whisper_transcribe_rx = whisper.subscribe();
|
110 |
loop {
|
@@ -118,7 +118,7 @@ async fn stream_speaker(
|
|
118 |
msg = socket.next() => {
|
119 |
match msg.as_ref() {
|
120 |
Some(Ok(Message::Binary(bin))) => {
|
121 |
-
let _ = whisper.
|
122 |
// if let Err(e) = origin_tx.send(bin.to_vec()).await {
|
123 |
// tracing::warn!("failed to send voice: {}", e);
|
124 |
// break;
|
@@ -173,7 +173,7 @@ async fn stream_listener(
|
|
173 |
ws: WebSocket,
|
174 |
) -> impl IntoResponse {
|
175 |
let lesson_opt = ctx.lessons_manager.get_lesson(query.id).await;
|
176 |
-
|
177 |
|
178 |
ws.on_upgrade(|mut socket| async move {
|
179 |
let voice_id = match query.voice.parse() {
|
|
|
104 |
ws.on_upgrade(|mut socket| async move {
|
105 |
let _origin_tx = lesson.voice_channel();
|
106 |
let mut transcribe_rx = lesson.transcript_channel();
|
107 |
+
let mut whisper = WhisperHandler::new(SETTINGS.whisper.clone(), prompt)
|
108 |
.expect("failed to create whisper");
|
109 |
let mut whisper_transcribe_rx = whisper.subscribe();
|
110 |
loop {
|
|
|
118 |
msg = socket.next() => {
|
119 |
match msg.as_ref() {
|
120 |
Some(Ok(Message::Binary(bin))) => {
|
121 |
+
let _ = whisper.send_bytes(bin.to_vec()).await; // whisper test
|
122 |
// if let Err(e) = origin_tx.send(bin.to_vec()).await {
|
123 |
// tracing::warn!("failed to send voice: {}", e);
|
124 |
// break;
|
|
|
173 |
ws: WebSocket,
|
174 |
) -> impl IntoResponse {
|
175 |
let lesson_opt = ctx.lessons_manager.get_lesson(query.id).await;
|
176 |
+
debug!("listener param = {:?}", query);
|
177 |
|
178 |
ws.on_upgrade(|mut socket| async move {
|
179 |
let voice_id = match query.voice.parse() {
|
src/whisper.rs
CHANGED
@@ -1,24 +1,26 @@
|
|
1 |
use std::{
|
2 |
collections::VecDeque,
|
3 |
-
ffi::c_int,
|
4 |
fmt::{Debug, Display, Formatter},
|
5 |
thread::sleep,
|
6 |
time::Duration,
|
7 |
};
|
|
|
8 |
|
9 |
use once_cell::sync::Lazy;
|
10 |
use tokio::sync::{broadcast, mpsc, oneshot};
|
11 |
-
use
|
12 |
-
use
|
13 |
-
use
|
14 |
|
15 |
use crate::config::{Settings, SETTINGS};
|
16 |
use crate::{config::WhisperConfig, group::GroupedWithin};
|
17 |
|
|
|
|
|
18 |
static WHISPER_CONTEXT: Lazy<WhisperContext> = Lazy::new(|| {
|
19 |
let settings = Settings::new().expect("Failed to initialize settings.");
|
20 |
if tracing::enabled!(tracing::Level::DEBUG) {
|
21 |
-
let info = print_system_info();
|
22 |
debug!("system_info: n_threads = {} / {} | {}\n",
|
23 |
settings.whisper.params.n_threads.unwrap_or(0),
|
24 |
std::thread::available_parallelism().map(|c| c.get()).unwrap_or(0),
|
@@ -27,13 +29,6 @@ static WHISPER_CONTEXT: Lazy<WhisperContext> = Lazy::new(|| {
|
|
27 |
WhisperContext::new(&settings.whisper.model).expect("failed to create WhisperContext")
|
28 |
});
|
29 |
|
30 |
-
fn print_system_info() -> String {
|
31 |
-
unsafe {
|
32 |
-
let raw_info = whisper_rs_sys::whisper_print_system_info();
|
33 |
-
let info = std::ffi::CStr::from_ptr(raw_info);
|
34 |
-
info.to_str().unwrap_or("failed to get system info").to_string()
|
35 |
-
}
|
36 |
-
}
|
37 |
|
38 |
#[derive(Debug)]
|
39 |
pub(crate) enum Error {
|
@@ -70,16 +65,21 @@ impl std::error::Error for Error {
|
|
70 |
}
|
71 |
}
|
72 |
|
73 |
-
fn
|
74 |
-
|
75 |
.chunks_exact(2)
|
76 |
.map(|chunk| {
|
77 |
let mut buf = [0u8; 2];
|
78 |
buf.copy_from_slice(chunk);
|
79 |
i16::from_le_bytes(buf)
|
80 |
})
|
81 |
-
.collect::<Vec<i16>>()
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
83 |
}
|
84 |
|
85 |
#[derive(Clone, Debug)]
|
@@ -87,20 +87,21 @@ pub struct Segment {
|
|
87 |
pub start_timestamp: i64,
|
88 |
pub end_timestamp: i64,
|
89 |
pub text: String,
|
90 |
-
tokens: Vec<
|
91 |
}
|
92 |
|
93 |
pub struct WhisperHandler {
|
94 |
-
tx: mpsc::Sender<Vec<
|
95 |
-
transcription_tx: broadcast::Sender<Vec<
|
96 |
stop_handle: Option<oneshot::Sender<()>>,
|
97 |
}
|
98 |
|
99 |
impl WhisperHandler {
|
100 |
pub(crate) fn new(config: WhisperConfig, prompt: String) -> Result<Self, Error> {
|
|
|
101 |
let (stop_handle, mut stop_signal) = oneshot::channel();
|
102 |
-
let (pcm_tx, pcm_rx) = mpsc::channel::<Vec<
|
103 |
-
let (transcription_tx, _) = broadcast::channel::<Vec<
|
104 |
let shared_transcription_tx = transcription_tx.clone();
|
105 |
let state = WHISPER_CONTEXT
|
106 |
.create_state()
|
@@ -109,21 +110,46 @@ impl WhisperHandler {
|
|
109 |
.tokenize(prompt.as_str(), SETTINGS.whisper.max_prompt_tokens)
|
110 |
.map_err(|e| Error::whisper_error("failed to tokenize prompt", e))?;
|
111 |
tokio::task::spawn_blocking(move || {
|
|
|
|
|
112 |
let mut detector = Detector::new(state, &SETTINGS.whisper, preset_prompt_tokens);
|
113 |
let mut grouped = GroupedWithin::new(
|
114 |
-
detector.n_samples_step
|
115 |
Duration::from_millis(config.step_ms as u64),
|
116 |
pcm_rx,
|
117 |
u16::MAX as usize,
|
118 |
);
|
119 |
while let Err(oneshot::error::TryRecvError::Empty) = stop_signal.try_recv() {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
let new_pcm_f32 = match grouped.next() {
|
121 |
Err(mpsc::error::TryRecvError::Disconnected) => break,
|
122 |
Err(mpsc::error::TryRecvError::Empty) => {
|
123 |
sleep(Duration::from_millis(10));
|
124 |
continue;
|
125 |
}
|
126 |
-
Ok(data) =>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
};
|
128 |
|
129 |
detector.feed(new_pcm_f32);
|
@@ -135,26 +161,22 @@ impl WhisperHandler {
|
|
135 |
result
|
136 |
}
|
137 |
Err(err) => {
|
138 |
-
|
139 |
continue;
|
140 |
}
|
141 |
};
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
segment.text
|
149 |
-
);
|
150 |
-
}
|
151 |
-
|
152 |
-
if let Err(e) = shared_transcription_tx.send(segments) {
|
153 |
tracing::error!("failed to send transcription: {}", e);
|
154 |
break;
|
155 |
};
|
156 |
}
|
157 |
});
|
|
|
158 |
Ok(Self {
|
159 |
tx: pcm_tx,
|
160 |
transcription_tx,
|
@@ -162,27 +184,34 @@ impl WhisperHandler {
|
|
162 |
})
|
163 |
}
|
164 |
|
165 |
-
pub fn subscribe(&self) -> broadcast::Receiver<Vec<
|
166 |
self.transcription_tx.subscribe()
|
167 |
}
|
168 |
|
169 |
-
pub async fn
|
170 |
self.tx.send(data).await
|
171 |
}
|
|
|
|
|
|
|
|
|
|
|
172 |
}
|
173 |
|
174 |
#[allow(dead_code)]
|
175 |
struct Detector {
|
176 |
state: WhisperState<'static>,
|
177 |
config: &'static WhisperConfig,
|
|
|
|
|
|
|
178 |
preset_prompt_tokens: Vec<WhisperToken>,
|
179 |
n_samples_keep: usize,
|
180 |
n_samples_step: usize,
|
181 |
n_samples_len: usize,
|
182 |
-
prompt_tokens: Vec<
|
183 |
pcm_f32: VecDeque<f32>,
|
184 |
offset: usize,
|
185 |
-
stable_offset: usize,
|
186 |
}
|
187 |
|
188 |
impl Detector {
|
@@ -194,14 +223,16 @@ impl Detector {
|
|
194 |
Detector {
|
195 |
state,
|
196 |
config,
|
|
|
|
|
|
|
197 |
preset_prompt_tokens,
|
198 |
-
n_samples_keep:
|
199 |
-
n_samples_step:
|
200 |
-
n_samples_len:
|
201 |
prompt_tokens: Default::default(),
|
202 |
-
pcm_f32: VecDeque::
|
203 |
offset: 0,
|
204 |
-
stable_offset: 0,
|
205 |
}
|
206 |
}
|
207 |
|
@@ -210,20 +241,16 @@ impl Detector {
|
|
210 |
if self.pcm_f32.len() < self.n_samples_len {
|
211 |
return;
|
212 |
}
|
213 |
-
let len_to_drain = self
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
|
|
218 |
}
|
219 |
|
220 |
fn inference(&mut self) -> Result<Vec<Segment>, Error> {
|
221 |
-
let
|
222 |
-
self.preset_prompt_tokens.as_slice(),
|
223 |
-
self.prompt_tokens.as_slice(),
|
224 |
-
]
|
225 |
-
.concat();
|
226 |
-
let params = self.config.params.to_full_params(prompt_tokens.as_slice());
|
227 |
let start = std::time::Instant::now();
|
228 |
let _ = self
|
229 |
.state
|
@@ -231,35 +258,32 @@ impl Detector {
|
|
231 |
.map_err(|e| Error::whisper_error("failed to initialize WhisperState", e))?;
|
232 |
let end = std::time::Instant::now();
|
233 |
if end - start > Duration::from_millis(self.config.step_ms as u64) {
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
);
|
239 |
}
|
240 |
|
241 |
-
let timestamp_offset: i64 = (self.offset * 1000 / WHISPER_SAMPLE_RATE
|
242 |
-
let stable_offset: i64 = (self.stable_offset * 1000 / WHISPER_SAMPLE_RATE as usize) as i64;
|
243 |
let num_segments = self
|
244 |
.state
|
245 |
.full_n_segments()
|
246 |
.map_err(|e| Error::whisper_error("failed to get number of segments", e))?;
|
247 |
let mut segments: Vec<Segment> = Vec::with_capacity(num_segments as usize);
|
248 |
for i in 0..num_segments {
|
249 |
-
let end_timestamp: i64 = timestamp_offset
|
250 |
-
+ 10 * self
|
251 |
-
.state
|
252 |
-
.full_get_segment_t1(i)
|
253 |
-
.map_err(|e| Error::whisper_error("failed to get end timestamp", e))?;
|
254 |
-
if end_timestamp <= stable_offset {
|
255 |
-
continue;
|
256 |
-
}
|
257 |
-
|
258 |
let start_timestamp: i64 = timestamp_offset
|
259 |
+ 10 * self
|
260 |
.state
|
261 |
.full_get_segment_t0(i)
|
262 |
.map_err(|e| Error::whisper_error("failed to get start timestamp", e))?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
let segment = self
|
264 |
.state
|
265 |
.full_get_segment_text(i)
|
@@ -270,11 +294,9 @@ impl Detector {
|
|
270 |
.map_err(|e| Error::whisper_error("failed to get segment tokens", e))?;
|
271 |
let mut segment_tokens = Vec::with_capacity(num_tokens as usize);
|
272 |
for j in 0..num_tokens {
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
.map_err(|e| Error::whisper_error("failed to get token", e))?,
|
277 |
-
);
|
278 |
}
|
279 |
|
280 |
segments.push(Segment {
|
@@ -285,52 +307,132 @@ impl Detector {
|
|
285 |
});
|
286 |
}
|
287 |
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
|
292 |
-
|
293 |
-
|
|
|
294 |
};
|
295 |
|
296 |
-
let
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
}
|
302 |
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
};
|
307 |
-
let drop_offset: usize =
|
308 |
-
last.end_timestamp as usize / 1000 * WHISPER_SAMPLE_RATE as usize - self.offset;
|
309 |
-
if drop_offset > self.pcm_f32.len() {
|
310 |
-
return; // Arithmetic overflow
|
311 |
}
|
312 |
-
|
313 |
-
self.offset += len_to_drain;
|
314 |
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
}
|
|
|
|
|
|
|
|
|
324 |
}
|
325 |
}
|
326 |
|
327 |
impl Drop for WhisperHandler {
|
328 |
fn drop(&mut self) {
|
329 |
let Some(stop_handle) = self.stop_handle.take() else {
|
330 |
-
return
|
331 |
};
|
332 |
if stop_handle.send(()).is_err() {
|
333 |
-
|
334 |
}
|
335 |
}
|
336 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
use std::{
|
2 |
collections::VecDeque,
|
|
|
3 |
fmt::{Debug, Display, Formatter},
|
4 |
thread::sleep,
|
5 |
time::Duration,
|
6 |
};
|
7 |
+
use fvad::SampleRate;
|
8 |
|
9 |
use once_cell::sync::Lazy;
|
10 |
use tokio::sync::{broadcast, mpsc, oneshot};
|
11 |
+
use tokio::time::Instant;
|
12 |
+
use tracing::{debug, trace, warn};
|
13 |
+
use whisper_rs::{convert_integer_to_float_audio, WhisperContext, WhisperState, WhisperToken, WhisperTokenData};
|
14 |
|
15 |
use crate::config::{Settings, SETTINGS};
|
16 |
use crate::{config::WhisperConfig, group::GroupedWithin};
|
17 |
|
18 |
+
const WHISPER_SAMPLE_RATE: usize = whisper_rs_sys::WHISPER_SAMPLE_RATE as usize;
|
19 |
+
|
20 |
static WHISPER_CONTEXT: Lazy<WhisperContext> = Lazy::new(|| {
|
21 |
let settings = Settings::new().expect("Failed to initialize settings.");
|
22 |
if tracing::enabled!(tracing::Level::DEBUG) {
|
23 |
+
let info = whisper_rs::print_system_info();
|
24 |
debug!("system_info: n_threads = {} / {} | {}\n",
|
25 |
settings.whisper.params.n_threads.unwrap_or(0),
|
26 |
std::thread::available_parallelism().map(|c| c.get()).unwrap_or(0),
|
|
|
29 |
WhisperContext::new(&settings.whisper.model).expect("failed to create WhisperContext")
|
30 |
});
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
#[derive(Debug)]
|
34 |
pub(crate) enum Error {
|
|
|
65 |
}
|
66 |
}
|
67 |
|
68 |
+
fn u8_to_i16(input: &[u8]) -> Vec<i16> {
|
69 |
+
input
|
70 |
.chunks_exact(2)
|
71 |
.map(|chunk| {
|
72 |
let mut buf = [0u8; 2];
|
73 |
buf.copy_from_slice(chunk);
|
74 |
i16::from_le_bytes(buf)
|
75 |
})
|
76 |
+
.collect::<Vec<i16>>()
|
77 |
+
}
|
78 |
+
|
79 |
+
#[derive(Clone, Debug)]
|
80 |
+
pub enum Output {
|
81 |
+
Unstable(Segment),
|
82 |
+
Stable(Segment),
|
83 |
}
|
84 |
|
85 |
#[derive(Clone, Debug)]
|
|
|
87 |
pub start_timestamp: i64,
|
88 |
pub end_timestamp: i64,
|
89 |
pub text: String,
|
90 |
+
tokens: Vec<WhisperTokenData>,
|
91 |
}
|
92 |
|
93 |
pub struct WhisperHandler {
|
94 |
+
tx: mpsc::Sender<Vec<i16>>,
|
95 |
+
transcription_tx: broadcast::Sender<Vec<Output>>,
|
96 |
stop_handle: Option<oneshot::Sender<()>>,
|
97 |
}
|
98 |
|
99 |
impl WhisperHandler {
|
100 |
pub(crate) fn new(config: WhisperConfig, prompt: String) -> Result<Self, Error> {
|
101 |
+
let vad_slice_size = WHISPER_SAMPLE_RATE / 100 * 3;
|
102 |
let (stop_handle, mut stop_signal) = oneshot::channel();
|
103 |
+
let (pcm_tx, pcm_rx) = mpsc::channel::<Vec<i16>>(128);
|
104 |
+
let (transcription_tx, _) = broadcast::channel::<Vec<Output>>(128);
|
105 |
let shared_transcription_tx = transcription_tx.clone();
|
106 |
let state = WHISPER_CONTEXT
|
107 |
.create_state()
|
|
|
110 |
.tokenize(prompt.as_str(), SETTINGS.whisper.max_prompt_tokens)
|
111 |
.map_err(|e| Error::whisper_error("failed to tokenize prompt", e))?;
|
112 |
tokio::task::spawn_blocking(move || {
|
113 |
+
let mut vad = fvad::Fvad::new().expect("failed to create VAD")
|
114 |
+
.set_sample_rate(SampleRate::Rate16kHz);
|
115 |
let mut detector = Detector::new(state, &SETTINGS.whisper, preset_prompt_tokens);
|
116 |
let mut grouped = GroupedWithin::new(
|
117 |
+
detector.n_samples_step,
|
118 |
Duration::from_millis(config.step_ms as u64),
|
119 |
pcm_rx,
|
120 |
u16::MAX as usize,
|
121 |
);
|
122 |
while let Err(oneshot::error::TryRecvError::Empty) = stop_signal.try_recv() {
|
123 |
+
if detector.has_crossed_next_line() {
|
124 |
+
if let Some(segment) = detector.next_line() {
|
125 |
+
let segments = vec![Output::Stable(segment)];
|
126 |
+
if let Err(e) = shared_transcription_tx.send(segments) {
|
127 |
+
tracing::error!("failed to send transcription: {}", e);
|
128 |
+
break;
|
129 |
+
};
|
130 |
+
}
|
131 |
+
}
|
132 |
let new_pcm_f32 = match grouped.next() {
|
133 |
Err(mpsc::error::TryRecvError::Disconnected) => break,
|
134 |
Err(mpsc::error::TryRecvError::Empty) => {
|
135 |
sleep(Duration::from_millis(10));
|
136 |
continue;
|
137 |
}
|
138 |
+
Ok(data) => {
|
139 |
+
let active_voice = data
|
140 |
+
.chunks(vad_slice_size)
|
141 |
+
.filter(|frame| {
|
142 |
+
if frame.len() != vad_slice_size {
|
143 |
+
true
|
144 |
+
} else {
|
145 |
+
vad.is_voice_frame(frame).unwrap_or(true)
|
146 |
+
}
|
147 |
+
// true
|
148 |
+
})
|
149 |
+
.collect::<Vec<_>>()
|
150 |
+
.concat();
|
151 |
+
convert_integer_to_float_audio(&active_voice)
|
152 |
+
},
|
153 |
};
|
154 |
|
155 |
detector.feed(new_pcm_f32);
|
|
|
161 |
result
|
162 |
}
|
163 |
Err(err) => {
|
164 |
+
warn!("failed to inference: {}", err);
|
165 |
continue;
|
166 |
}
|
167 |
};
|
168 |
|
169 |
+
let outputs = segments
|
170 |
+
.iter()
|
171 |
+
.map(|segment| Output::Unstable(segment.clone()))
|
172 |
+
.collect::<Vec<_>>();
|
173 |
+
if let Err(e) = shared_transcription_tx.send(outputs) {
|
|
|
|
|
|
|
|
|
|
|
174 |
tracing::error!("failed to send transcription: {}", e);
|
175 |
break;
|
176 |
};
|
177 |
}
|
178 |
});
|
179 |
+
|
180 |
Ok(Self {
|
181 |
tx: pcm_tx,
|
182 |
transcription_tx,
|
|
|
184 |
})
|
185 |
}
|
186 |
|
187 |
+
pub fn subscribe(&self) -> broadcast::Receiver<Vec<Output>> {
|
188 |
self.transcription_tx.subscribe()
|
189 |
}
|
190 |
|
191 |
+
pub async fn send_i16(&mut self, data: Vec<i16>) -> Result<(), mpsc::error::SendError<Vec<i16>>> {
|
192 |
self.tx.send(data).await
|
193 |
}
|
194 |
+
|
195 |
+
pub async fn send_bytes(&mut self, data: Vec<u8>) -> Result<(), mpsc::error::SendError<Vec<i16>>> {
|
196 |
+
let i16_data = u8_to_i16(&data);
|
197 |
+
self.send_i16(i16_data).await
|
198 |
+
}
|
199 |
}
|
200 |
|
201 |
#[allow(dead_code)]
|
202 |
struct Detector {
|
203 |
state: WhisperState<'static>,
|
204 |
config: &'static WhisperConfig,
|
205 |
+
start_time: Instant,
|
206 |
+
segment: Option<Segment>,
|
207 |
+
line_num: usize,
|
208 |
preset_prompt_tokens: Vec<WhisperToken>,
|
209 |
n_samples_keep: usize,
|
210 |
n_samples_step: usize,
|
211 |
n_samples_len: usize,
|
212 |
+
prompt_tokens: Vec<WhisperToken>,
|
213 |
pcm_f32: VecDeque<f32>,
|
214 |
offset: usize,
|
|
|
215 |
}
|
216 |
|
217 |
impl Detector {
|
|
|
223 |
Detector {
|
224 |
state,
|
225 |
config,
|
226 |
+
start_time: Instant::now(),
|
227 |
+
segment: None,
|
228 |
+
line_num: 0,
|
229 |
preset_prompt_tokens,
|
230 |
+
n_samples_keep: config.keep_ms * WHISPER_SAMPLE_RATE / 1000,
|
231 |
+
n_samples_step: config.step_ms * WHISPER_SAMPLE_RATE / 1000,
|
232 |
+
n_samples_len: config.length_ms * WHISPER_SAMPLE_RATE / 1000,
|
233 |
prompt_tokens: Default::default(),
|
234 |
+
pcm_f32: VecDeque::with_capacity(config.length_ms * WHISPER_SAMPLE_RATE / 1000),
|
235 |
offset: 0,
|
|
|
236 |
}
|
237 |
}
|
238 |
|
|
|
241 |
if self.pcm_f32.len() < self.n_samples_len {
|
242 |
return;
|
243 |
}
|
244 |
+
// let len_to_drain = self
|
245 |
+
// .pcm_f32
|
246 |
+
// .drain(0..(self.pcm_f32.len() - self.n_samples_len))
|
247 |
+
// .len();
|
248 |
+
// warn!("ASR too slow, drain {} samples", len_to_drain);
|
249 |
+
// self.offset += len_to_drain;
|
250 |
}
|
251 |
|
252 |
fn inference(&mut self) -> Result<Vec<Segment>, Error> {
|
253 |
+
let params = self.config.params.to_full_params(self.prompt_tokens.as_slice());
|
|
|
|
|
|
|
|
|
|
|
254 |
let start = std::time::Instant::now();
|
255 |
let _ = self
|
256 |
.state
|
|
|
258 |
.map_err(|e| Error::whisper_error("failed to initialize WhisperState", e))?;
|
259 |
let end = std::time::Instant::now();
|
260 |
if end - start > Duration::from_millis(self.config.step_ms as u64) {
|
261 |
+
// warn!(
|
262 |
+
// "full([{}]) took {} ms too slow",
|
263 |
+
// self.pcm_f32.len(),
|
264 |
+
// (end - start).as_millis()
|
265 |
+
// );
|
266 |
}
|
267 |
|
268 |
+
let timestamp_offset: i64 = (self.offset * 1000 / WHISPER_SAMPLE_RATE) as i64;
|
|
|
269 |
let num_segments = self
|
270 |
.state
|
271 |
.full_n_segments()
|
272 |
.map_err(|e| Error::whisper_error("failed to get number of segments", e))?;
|
273 |
let mut segments: Vec<Segment> = Vec::with_capacity(num_segments as usize);
|
274 |
for i in 0..num_segments {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
let start_timestamp: i64 = timestamp_offset
|
276 |
+ 10 * self
|
277 |
.state
|
278 |
.full_get_segment_t0(i)
|
279 |
.map_err(|e| Error::whisper_error("failed to get start timestamp", e))?;
|
280 |
+
|
281 |
+
let end_timestamp: i64 = timestamp_offset
|
282 |
+
+ 10 * self
|
283 |
+
.state
|
284 |
+
.full_get_segment_t1(i)
|
285 |
+
.map_err(|e| Error::whisper_error("failed to get end timestamp", e))?;
|
286 |
+
|
287 |
let segment = self
|
288 |
.state
|
289 |
.full_get_segment_text(i)
|
|
|
294 |
.map_err(|e| Error::whisper_error("failed to get segment tokens", e))?;
|
295 |
let mut segment_tokens = Vec::with_capacity(num_tokens as usize);
|
296 |
for j in 0..num_tokens {
|
297 |
+
let token_data = self.state.full_get_token_data(i, j)
|
298 |
+
.map_err(|e| Error::whisper_error("failed to get token data", e))?;
|
299 |
+
segment_tokens.push(token_data);
|
|
|
|
|
300 |
}
|
301 |
|
302 |
segments.push(Segment {
|
|
|
307 |
});
|
308 |
}
|
309 |
|
310 |
+
self.segment = segments.first().cloned();
|
311 |
+
Ok(segments.to_vec())
|
312 |
+
}
|
313 |
|
314 |
+
fn remember_prompt(&mut self) {
|
315 |
+
let Some(segment) = self.segment.as_ref() else {
|
316 |
+
return
|
317 |
};
|
318 |
|
319 |
+
let tokens = segment
|
320 |
+
.tokens
|
321 |
+
.iter()
|
322 |
+
.map(|td| td.tid)
|
323 |
+
.collect::<Vec<WhisperToken>>();
|
|
|
324 |
|
325 |
+
self.prompt_tokens.extend(tokens);
|
326 |
+
if self.prompt_tokens.len() > self.config.max_prompt_tokens {
|
327 |
+
let _ = self.prompt_tokens.drain(0..(self.prompt_tokens.len() - self.config.max_prompt_tokens)).len();
|
|
|
|
|
|
|
|
|
|
|
328 |
}
|
329 |
+
}
|
|
|
330 |
|
331 |
+
fn has_crossed_next_line(&self) -> bool {
|
332 |
+
let now = Instant::now();
|
333 |
+
let elapsed = now - self.start_time;
|
334 |
+
let line_number: usize = (elapsed.as_millis() / self.config.length_ms as u128) as usize;
|
335 |
+
line_number > self.line_num
|
336 |
+
}
|
337 |
+
|
338 |
+
fn next_line(&mut self) -> Option<Segment> {
|
339 |
+
if self.pcm_f32.len() > self.n_samples_keep {
|
340 |
+
let drain_size = self.pcm_f32.drain(0..(self.pcm_f32.len() - self.n_samples_keep)).len();
|
341 |
+
self.offset += drain_size;
|
342 |
+
} else {
|
343 |
+
let size_will_clear = self.pcm_f32.len();
|
344 |
+
self.pcm_f32.clear();
|
345 |
+
self.offset += size_will_clear;
|
346 |
}
|
347 |
+
|
348 |
+
self.line_num += 1;
|
349 |
+
self.remember_prompt();
|
350 |
+
self.segment.take()
|
351 |
}
|
352 |
}
|
353 |
|
354 |
impl Drop for WhisperHandler {
|
355 |
fn drop(&mut self) {
|
356 |
let Some(stop_handle) = self.stop_handle.take() else {
|
357 |
+
return warn!("WhisperHandler::drop() called without stop_handle");
|
358 |
};
|
359 |
if stop_handle.send(()).is_err() {
|
360 |
+
warn!("WhisperHandler::drop() failed to send stop signal");
|
361 |
}
|
362 |
}
|
363 |
}
|
364 |
+
|
365 |
+
#[cfg(test)]
|
366 |
+
mod test {
|
367 |
+
use super::*;
|
368 |
+
use std::io::{stdout, Write};
|
369 |
+
use hound;
|
370 |
+
use tracing_test;
|
371 |
+
use tracing::info;
|
372 |
+
|
373 |
+
async fn print_output(output: Output) {
|
374 |
+
match output {
|
375 |
+
Output::Stable(stable) => {
|
376 |
+
print!("\x1b[2K\r");
|
377 |
+
print!("{}\n", stable.text);
|
378 |
+
},
|
379 |
+
Output::Unstable(unstable) => {
|
380 |
+
// back to previous line of console
|
381 |
+
print!("\x1b[2K\r");
|
382 |
+
print!("{}", " ".repeat(100));
|
383 |
+
print!("\x1b[2K\r");
|
384 |
+
print!("{} ...", unstable.text);
|
385 |
+
}
|
386 |
+
}
|
387 |
+
stdout().flush().unwrap();
|
388 |
+
}
|
389 |
+
#[tokio::test]
|
390 |
+
#[tracing_test::traced_test]
|
391 |
+
async fn test_whisper_handler() {
|
392 |
+
let mut whisper_handler = WhisperHandler::new(
|
393 |
+
SETTINGS.whisper.clone(),
|
394 |
+
"Harry Potter and the Philosopher's Stone".to_string(),
|
395 |
+
).expect("failed to create WhisperHandler");
|
396 |
+
|
397 |
+
let wav = hound::WavReader::open("samples/ADHD_1A.wav")
|
398 |
+
.expect("failed to open wav");
|
399 |
+
let spec = wav.spec();
|
400 |
+
println!("{:?}", spec);
|
401 |
+
let samples = wav
|
402 |
+
.into_samples::<i16>()
|
403 |
+
.map(|s| s.unwrap())
|
404 |
+
.collect::<Vec<i16>>();
|
405 |
+
let chunks = samples.chunks(1600)
|
406 |
+
.map(|chunk| chunk.to_vec())
|
407 |
+
.into_iter();
|
408 |
+
|
409 |
+
let mut rx = whisper_handler.subscribe();
|
410 |
+
let send_fut = async {
|
411 |
+
// tokio::time::sleep(Duration::from_secs(5)).await;
|
412 |
+
for chunk in chunks {
|
413 |
+
let _ = whisper_handler.send_i16(chunk).await.expect("failed to send sample");
|
414 |
+
tokio::time::sleep(Duration::from_millis(100)).await;
|
415 |
+
}
|
416 |
+
};
|
417 |
+
|
418 |
+
let recv_fut = async {
|
419 |
+
while let Ok(outputs) = rx.recv().await {
|
420 |
+
let Some(output) = outputs.first() else {
|
421 |
+
continue
|
422 |
+
};
|
423 |
+
|
424 |
+
match output {
|
425 |
+
Output::Stable(stable) => {
|
426 |
+
println!("{}", stable.text);
|
427 |
+
},
|
428 |
+
Output::Unstable(unstable) => {
|
429 |
+
|
430 |
+
}
|
431 |
+
}
|
432 |
+
|
433 |
+
}
|
434 |
+
};
|
435 |
+
|
436 |
+
tokio::join!(send_fut, recv_fut);
|
437 |
+
}
|
438 |
+
}
|