Spaces:
Sleeping
Sleeping
Matrix
commited on
Commit
·
697a86c
1
Parent(s):
2bb7b57
chore: remove unnecessary clap & update README.md
Browse files- Cargo.lock +0 -113
- Cargo.toml +6 -7
- README.MD +3 -4
- src/config.rs +1 -1
- src/main.rs +3 -24
- src/whisper.rs +8 -8
Cargo.lock
CHANGED
@@ -26,54 +26,6 @@ dependencies = [
|
|
26 |
"memchr",
|
27 |
]
|
28 |
|
29 |
-
[[package]]
|
30 |
-
name = "anstream"
|
31 |
-
version = "0.6.4"
|
32 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
33 |
-
checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44"
|
34 |
-
dependencies = [
|
35 |
-
"anstyle",
|
36 |
-
"anstyle-parse",
|
37 |
-
"anstyle-query",
|
38 |
-
"anstyle-wincon",
|
39 |
-
"colorchoice",
|
40 |
-
"utf8parse",
|
41 |
-
]
|
42 |
-
|
43 |
-
[[package]]
|
44 |
-
name = "anstyle"
|
45 |
-
version = "1.0.4"
|
46 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
47 |
-
checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
|
48 |
-
|
49 |
-
[[package]]
|
50 |
-
name = "anstyle-parse"
|
51 |
-
version = "0.2.2"
|
52 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
53 |
-
checksum = "317b9a89c1868f5ea6ff1d9539a69f45dffc21ce321ac1fd1160dfa48c8e2140"
|
54 |
-
dependencies = [
|
55 |
-
"utf8parse",
|
56 |
-
]
|
57 |
-
|
58 |
-
[[package]]
|
59 |
-
name = "anstyle-query"
|
60 |
-
version = "1.0.0"
|
61 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
62 |
-
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
|
63 |
-
dependencies = [
|
64 |
-
"windows-sys",
|
65 |
-
]
|
66 |
-
|
67 |
-
[[package]]
|
68 |
-
name = "anstyle-wincon"
|
69 |
-
version = "3.0.1"
|
70 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
71 |
-
checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628"
|
72 |
-
dependencies = [
|
73 |
-
"anstyle",
|
74 |
-
"windows-sys",
|
75 |
-
]
|
76 |
-
|
77 |
[[package]]
|
78 |
name = "async-stream"
|
79 |
version = "0.3.5"
|
@@ -661,52 +613,6 @@ dependencies = [
|
|
661 |
"libloading",
|
662 |
]
|
663 |
|
664 |
-
[[package]]
|
665 |
-
name = "clap"
|
666 |
-
version = "4.4.7"
|
667 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
668 |
-
checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b"
|
669 |
-
dependencies = [
|
670 |
-
"clap_builder",
|
671 |
-
"clap_derive",
|
672 |
-
]
|
673 |
-
|
674 |
-
[[package]]
|
675 |
-
name = "clap_builder"
|
676 |
-
version = "4.4.7"
|
677 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
678 |
-
checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663"
|
679 |
-
dependencies = [
|
680 |
-
"anstream",
|
681 |
-
"anstyle",
|
682 |
-
"clap_lex",
|
683 |
-
"strsim",
|
684 |
-
]
|
685 |
-
|
686 |
-
[[package]]
|
687 |
-
name = "clap_derive"
|
688 |
-
version = "4.4.7"
|
689 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
690 |
-
checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
|
691 |
-
dependencies = [
|
692 |
-
"heck",
|
693 |
-
"proc-macro2",
|
694 |
-
"quote",
|
695 |
-
"syn 2.0.39",
|
696 |
-
]
|
697 |
-
|
698 |
-
[[package]]
|
699 |
-
name = "clap_lex"
|
700 |
-
version = "0.6.0"
|
701 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
702 |
-
checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
|
703 |
-
|
704 |
-
[[package]]
|
705 |
-
name = "colorchoice"
|
706 |
-
version = "1.0.0"
|
707 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
708 |
-
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
|
709 |
-
|
710 |
[[package]]
|
711 |
name = "core-foundation"
|
712 |
version = "0.9.3"
|
@@ -961,12 +867,6 @@ dependencies = [
|
|
961 |
"http",
|
962 |
]
|
963 |
|
964 |
-
[[package]]
|
965 |
-
name = "heck"
|
966 |
-
version = "0.4.1"
|
967 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
968 |
-
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
969 |
-
|
970 |
[[package]]
|
971 |
name = "hermit-abi"
|
972 |
version = "0.3.3"
|
@@ -1417,7 +1317,6 @@ dependencies = [
|
|
1417 |
"aws-sdk-polly",
|
1418 |
"aws-sdk-transcribestreaming",
|
1419 |
"aws-sdk-translate",
|
1420 |
-
"clap",
|
1421 |
"futures-util",
|
1422 |
"lazy_static",
|
1423 |
"poem",
|
@@ -1855,12 +1754,6 @@ version = "0.9.8"
|
|
1855 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1856 |
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
1857 |
|
1858 |
-
[[package]]
|
1859 |
-
name = "strsim"
|
1860 |
-
version = "0.10.0"
|
1861 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1862 |
-
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
1863 |
-
|
1864 |
[[package]]
|
1865 |
name = "subtle"
|
1866 |
version = "2.5.0"
|
@@ -2251,12 +2144,6 @@ version = "0.7.6"
|
|
2251 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2252 |
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
2253 |
|
2254 |
-
[[package]]
|
2255 |
-
name = "utf8parse"
|
2256 |
-
version = "0.2.1"
|
2257 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2258 |
-
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
2259 |
-
|
2260 |
[[package]]
|
2261 |
name = "uuid"
|
2262 |
version = "1.5.0"
|
|
|
26 |
"memchr",
|
27 |
]
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
[[package]]
|
30 |
name = "async-stream"
|
31 |
version = "0.3.5"
|
|
|
613 |
"libloading",
|
614 |
]
|
615 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
616 |
[[package]]
|
617 |
name = "core-foundation"
|
618 |
version = "0.9.3"
|
|
|
867 |
"http",
|
868 |
]
|
869 |
|
|
|
|
|
|
|
|
|
|
|
|
|
870 |
[[package]]
|
871 |
name = "hermit-abi"
|
872 |
version = "0.3.3"
|
|
|
1317 |
"aws-sdk-polly",
|
1318 |
"aws-sdk-transcribestreaming",
|
1319 |
"aws-sdk-translate",
|
|
|
1320 |
"futures-util",
|
1321 |
"lazy_static",
|
1322 |
"poem",
|
|
|
1754 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1755 |
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
1756 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1757 |
[[package]]
|
1758 |
name = "subtle"
|
1759 |
version = "2.5.0"
|
|
|
2144 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2145 |
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
2146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
2147 |
[[package]]
|
2148 |
name = "uuid"
|
2149 |
version = "1.5.0"
|
Cargo.toml
CHANGED
@@ -4,23 +4,22 @@ version = "0.1.0"
|
|
4 |
edition = "2021"
|
5 |
|
6 |
[dependencies]
|
|
|
7 |
aws-config = "0.56"
|
8 |
aws-sdk-transcribestreaming = "0.34"
|
9 |
aws-sdk-translate = "0.34"
|
10 |
aws-sdk-polly = "0.34"
|
11 |
-
clap = { version = "4.4", features = ["derive"] }
|
12 |
-
tokio = { version = "1.33", features = ["macros", "rt-multi-thread", "sync"] }
|
13 |
-
tokio-stream = "0.1"
|
14 |
-
async-stream = "0.3"
|
15 |
futures-util = "0.3"
|
|
|
16 |
serde = { version = "1.0", features = ["derive"] }
|
17 |
serde_json = "1.0"
|
18 |
serde_yaml = "0.9"
|
19 |
-
|
20 |
-
|
21 |
tracing = "0.1"
|
22 |
tracing-subscriber = "0.3"
|
23 |
-
|
|
|
24 |
|
25 |
[dependencies.poem]
|
26 |
version = "1.3"
|
|
|
4 |
edition = "2021"
|
5 |
|
6 |
[dependencies]
|
7 |
+
async-stream = "0.3"
|
8 |
aws-config = "0.56"
|
9 |
aws-sdk-transcribestreaming = "0.34"
|
10 |
aws-sdk-translate = "0.34"
|
11 |
aws-sdk-polly = "0.34"
|
|
|
|
|
|
|
|
|
12 |
futures-util = "0.3"
|
13 |
+
lazy_static = "1.4"
|
14 |
serde = { version = "1.0", features = ["derive"] }
|
15 |
serde_json = "1.0"
|
16 |
serde_yaml = "0.9"
|
17 |
+
tokio = { version = "1.33", features = ["macros", "rt-multi-thread", "sync"] }
|
18 |
+
tokio-stream = "0.1"
|
19 |
tracing = "0.1"
|
20 |
tracing-subscriber = "0.3"
|
21 |
+
whisper-rs = "0.8"
|
22 |
+
whisper-rs-sys = "0.6"
|
23 |
|
24 |
[dependencies.poem]
|
25 |
version = "1.3"
|
README.MD
CHANGED
@@ -21,10 +21,9 @@ Configuration like AWS credentials and models are specified in config.yaml.
|
|
21 |
## Getting Started
|
22 |
To run Polyhedron locally:
|
23 |
|
24 |
-
|
25 |
-
Run `cargo run`
|
26 |
-
|
27 |
-
Open http://localhost:8080 in the browser
|
28 |
## Architecture
|
29 |
|
30 |

|
|
|
21 |
## Getting Started
|
22 |
To run Polyhedron locally:
|
23 |
|
24 |
+
1. Config AWS account via https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html
|
25 |
+
2. Clone the repository, Run `cargo run --release`
|
26 |
+
3. Open http://localhost:8080 in the browser
|
|
|
27 |
## Architecture
|
28 |
|
29 |

|
src/config.rs
CHANGED
@@ -52,7 +52,7 @@ pub(crate) struct WhisperParams {
|
|
52 |
const NONE: [c_int; 0] = [];
|
53 |
|
54 |
impl WhisperParams {
|
55 |
-
pub(crate) fn to_full_params<'a, 'b>(&'a self,
|
56 |
let mut param = FullParams::new(Default::default());
|
57 |
param.set_print_progress(self.print_progress);
|
58 |
param.set_print_special(self.print_special);
|
|
|
52 |
const NONE: [c_int; 0] = [];
|
53 |
|
54 |
impl WhisperParams {
|
55 |
+
pub(crate) fn to_full_params<'a, 'b>(&'a self, _tokens: &'b [c_int]) -> FullParams<'a, 'b> {
|
56 |
let mut param = FullParams::new(Default::default());
|
57 |
param.set_print_progress(self.print_progress);
|
58 |
param.set_print_special(self.print_special);
|
src/main.rs
CHANGED
@@ -5,11 +5,7 @@
|
|
5 |
|
6 |
#![allow(clippy::result_large_err)]
|
7 |
|
8 |
-
use
|
9 |
-
use aws_sdk_transcribestreaming::{config::Region, meta::PKG_VERSION};
|
10 |
-
use clap::Parser;
|
11 |
-
use std::default::Default;
|
12 |
-
|
13 |
use futures_util::stream::StreamExt;
|
14 |
use futures_util::SinkExt;
|
15 |
use poem::endpoint::{StaticFileEndpoint, StaticFilesEndpoint};
|
@@ -29,13 +25,6 @@ mod group;
|
|
29 |
mod lesson;
|
30 |
mod whisper;
|
31 |
|
32 |
-
#[derive(Debug, Parser)]
|
33 |
-
struct Opt {
|
34 |
-
/// The AWS Region.
|
35 |
-
#[structopt(short, long)]
|
36 |
-
region: Option<String>,
|
37 |
-
}
|
38 |
-
|
39 |
#[derive(Clone)]
|
40 |
struct Context {
|
41 |
lessons_manager: LessonsManager,
|
@@ -45,21 +34,11 @@ struct Context {
|
|
45 |
async fn main() -> Result<(), std::io::Error> {
|
46 |
tracing_subscriber::fmt::init();
|
47 |
|
48 |
-
let Opt { region } = Opt::parse();
|
49 |
-
|
50 |
-
let region_provider = RegionProviderChain::first_try(region.map(Region::new))
|
51 |
-
.or_default_provider()
|
52 |
-
.or_else(Region::new("us-west-2"));
|
53 |
-
|
54 |
if tracing::enabled!(tracing::Level::DEBUG) {
|
55 |
tracing::debug!("Transcribe client version: {}", PKG_VERSION);
|
56 |
-
tracing::debug!(
|
57 |
-
"Region: {}",
|
58 |
-
region_provider.region().await.unwrap().as_ref()
|
59 |
-
);
|
60 |
}
|
61 |
|
62 |
-
let shared_config = aws_config::
|
63 |
let ctx = Context {
|
64 |
lessons_manager: LessonsManager::new(&shared_config),
|
65 |
};
|
@@ -112,7 +91,7 @@ async fn stream_speaker(
|
|
112 |
let prompt = query.prompt.clone().unwrap_or_default();
|
113 |
|
114 |
ws.on_upgrade(|mut socket| async move {
|
115 |
-
let
|
116 |
let mut transcribe_rx = lesson.transcript_channel();
|
117 |
let whisper =
|
118 |
WhisperHandler::new(CONFIG.whisper.clone(), prompt).expect("failed to create whisper");
|
|
|
5 |
|
6 |
#![allow(clippy::result_large_err)]
|
7 |
|
8 |
+
use aws_sdk_transcribestreaming::{meta::PKG_VERSION};
|
|
|
|
|
|
|
|
|
9 |
use futures_util::stream::StreamExt;
|
10 |
use futures_util::SinkExt;
|
11 |
use poem::endpoint::{StaticFileEndpoint, StaticFilesEndpoint};
|
|
|
25 |
mod lesson;
|
26 |
mod whisper;
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
#[derive(Clone)]
|
29 |
struct Context {
|
30 |
lessons_manager: LessonsManager,
|
|
|
34 |
async fn main() -> Result<(), std::io::Error> {
|
35 |
tracing_subscriber::fmt::init();
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
if tracing::enabled!(tracing::Level::DEBUG) {
|
38 |
tracing::debug!("Transcribe client version: {}", PKG_VERSION);
|
|
|
|
|
|
|
|
|
39 |
}
|
40 |
|
41 |
+
let shared_config = aws_config::load_from_env().await;
|
42 |
let ctx = Context {
|
43 |
lessons_manager: LessonsManager::new(&shared_config),
|
44 |
};
|
|
|
91 |
let prompt = query.prompt.clone().unwrap_or_default();
|
92 |
|
93 |
ws.on_upgrade(|mut socket| async move {
|
94 |
+
let _origin_tx = lesson.voice_channel();
|
95 |
let mut transcribe_rx = lesson.transcript_channel();
|
96 |
let whisper =
|
97 |
WhisperHandler::new(CONFIG.whisper.clone(), prompt).expect("failed to create whisper");
|
src/whisper.rs
CHANGED
@@ -12,7 +12,7 @@ use whisper_rs_sys::WHISPER_SAMPLE_RATE;
|
|
12 |
|
13 |
lazy_static! {
|
14 |
static ref WHISPER_CONTEXT: WhisperContext =
|
15 |
-
|
16 |
}
|
17 |
|
18 |
#[derive(Debug)]
|
@@ -203,7 +203,7 @@ impl Detector {
|
|
203 |
self.preset_prompt_tokens.as_slice(),
|
204 |
self.prompt_tokens.as_slice(),
|
205 |
]
|
206 |
-
|
207 |
let params = self.config.params.to_full_params(prompt_tokens.as_slice());
|
208 |
let start = std::time::Instant::now();
|
209 |
let _ = self
|
@@ -229,18 +229,18 @@ impl Detector {
|
|
229 |
for i in 0..num_segments {
|
230 |
let end_timestamp: i64 = timestamp_offset
|
231 |
+ 10 * self
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
if end_timestamp <= stable_offset {
|
236 |
continue;
|
237 |
}
|
238 |
|
239 |
let start_timestamp: i64 = timestamp_offset
|
240 |
+ 10 * self
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
let segment = self
|
245 |
.state
|
246 |
.full_get_segment_text(i)
|
|
|
12 |
|
13 |
lazy_static! {
|
14 |
static ref WHISPER_CONTEXT: WhisperContext =
|
15 |
+
WhisperContext::new(&*CONFIG.whisper.model).expect("failed to create WhisperContext");
|
16 |
}
|
17 |
|
18 |
#[derive(Debug)]
|
|
|
203 |
self.preset_prompt_tokens.as_slice(),
|
204 |
self.prompt_tokens.as_slice(),
|
205 |
]
|
206 |
+
.concat();
|
207 |
let params = self.config.params.to_full_params(prompt_tokens.as_slice());
|
208 |
let start = std::time::Instant::now();
|
209 |
let _ = self
|
|
|
229 |
for i in 0..num_segments {
|
230 |
let end_timestamp: i64 = timestamp_offset
|
231 |
+ 10 * self
|
232 |
+
.state
|
233 |
+
.full_get_segment_t1(i)
|
234 |
+
.map_err(|e| Error::whisper_error("failed to get end timestamp", e))?;
|
235 |
if end_timestamp <= stable_offset {
|
236 |
continue;
|
237 |
}
|
238 |
|
239 |
let start_timestamp: i64 = timestamp_offset
|
240 |
+ 10 * self
|
241 |
+
.state
|
242 |
+
.full_get_segment_t0(i)
|
243 |
+
.map_err(|e| Error::whisper_error("failed to get start timestamp", e))?;
|
244 |
let segment = self
|
245 |
.state
|
246 |
.full_get_segment_text(i)
|