Spaces:
Sleeping
Sleeping
upgrade aws-sdk version
Browse files- .dockerignore +6 -0
- Cargo.lock +31 -3
- Cargo.toml +3 -2
- Dockerfile +18 -40
- README.MD +1 -1
- docker-compose.yaml +16 -0
- src/config.rs +5 -0
- src/main.rs +9 -6
- src/whisper.rs +8 -9
.dockerignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.idea/
|
2 |
+
static/
|
3 |
+
target/
|
4 |
+
docker-compose.yaml
|
5 |
+
Dockerfile
|
6 |
+
README.MD
|
Cargo.lock
CHANGED
@@ -1100,6 +1100,15 @@ version = "0.4.20"
|
|
1100 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1101 |
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
1102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1103 |
[[package]]
|
1104 |
name = "memchr"
|
1105 |
version = "2.6.4"
|
@@ -1495,8 +1504,17 @@ checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
|
|
1495 |
dependencies = [
|
1496 |
"aho-corasick",
|
1497 |
"memchr",
|
1498 |
-
"regex-automata",
|
1499 |
-
"regex-syntax",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1500 |
]
|
1501 |
|
1502 |
[[package]]
|
@@ -1507,9 +1525,15 @@ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
|
|
1507 |
dependencies = [
|
1508 |
"aho-corasick",
|
1509 |
"memchr",
|
1510 |
-
"regex-syntax",
|
1511 |
]
|
1512 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1513 |
[[package]]
|
1514 |
name = "regex-syntax"
|
1515 |
version = "0.8.2"
|
@@ -2098,10 +2122,14 @@ version = "0.3.17"
|
|
2098 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2099 |
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
2100 |
dependencies = [
|
|
|
2101 |
"nu-ansi-term",
|
|
|
|
|
2102 |
"sharded-slab",
|
2103 |
"smallvec",
|
2104 |
"thread_local",
|
|
|
2105 |
"tracing-core",
|
2106 |
"tracing-log",
|
2107 |
]
|
|
|
1100 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1101 |
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
1102 |
|
1103 |
+
[[package]]
|
1104 |
+
name = "matchers"
|
1105 |
+
version = "0.1.0"
|
1106 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1107 |
+
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
|
1108 |
+
dependencies = [
|
1109 |
+
"regex-automata 0.1.10",
|
1110 |
+
]
|
1111 |
+
|
1112 |
[[package]]
|
1113 |
name = "memchr"
|
1114 |
version = "2.6.4"
|
|
|
1504 |
dependencies = [
|
1505 |
"aho-corasick",
|
1506 |
"memchr",
|
1507 |
+
"regex-automata 0.4.3",
|
1508 |
+
"regex-syntax 0.8.2",
|
1509 |
+
]
|
1510 |
+
|
1511 |
+
[[package]]
|
1512 |
+
name = "regex-automata"
|
1513 |
+
version = "0.1.10"
|
1514 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1515 |
+
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
1516 |
+
dependencies = [
|
1517 |
+
"regex-syntax 0.6.29",
|
1518 |
]
|
1519 |
|
1520 |
[[package]]
|
|
|
1525 |
dependencies = [
|
1526 |
"aho-corasick",
|
1527 |
"memchr",
|
1528 |
+
"regex-syntax 0.8.2",
|
1529 |
]
|
1530 |
|
1531 |
+
[[package]]
|
1532 |
+
name = "regex-syntax"
|
1533 |
+
version = "0.6.29"
|
1534 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1535 |
+
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
|
1536 |
+
|
1537 |
[[package]]
|
1538 |
name = "regex-syntax"
|
1539 |
version = "0.8.2"
|
|
|
2122 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2123 |
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
2124 |
dependencies = [
|
2125 |
+
"matchers",
|
2126 |
"nu-ansi-term",
|
2127 |
+
"once_cell",
|
2128 |
+
"regex",
|
2129 |
"sharded-slab",
|
2130 |
"smallvec",
|
2131 |
"thread_local",
|
2132 |
+
"tracing",
|
2133 |
"tracing-core",
|
2134 |
"tracing-log",
|
2135 |
]
|
Cargo.toml
CHANGED
@@ -19,7 +19,7 @@ serde_yaml = "0.9"
|
|
19 |
tokio = { version = "1.33", features = ["macros", "rt-multi-thread", "sync"] }
|
20 |
tokio-stream = "0.1"
|
21 |
tracing = "0.1"
|
22 |
-
tracing-subscriber = "0.3"
|
23 |
whisper-rs = "0.8"
|
24 |
whisper-rs-sys = "0.6"
|
25 |
|
@@ -34,9 +34,10 @@ features = ["coreml"]
|
|
34 |
[target.aarch64-apple-darwin.dependencies.whisper-rs-sys]
|
35 |
version = "0.6"
|
36 |
|
37 |
-
|
38 |
[target.x86_64-unknown-linux-gnu.dependencies.whisper-rs]
|
|
|
39 |
features = ["cuda"]
|
40 |
|
41 |
[target.aarch64-unknown-linux-gnu.dependencies.whisper-rs]
|
|
|
42 |
features = ["cuda"]
|
|
|
19 |
tokio = { version = "1.33", features = ["macros", "rt-multi-thread", "sync"] }
|
20 |
tokio-stream = "0.1"
|
21 |
tracing = "0.1"
|
22 |
+
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
23 |
whisper-rs = "0.8"
|
24 |
whisper-rs-sys = "0.6"
|
25 |
|
|
|
34 |
[target.aarch64-apple-darwin.dependencies.whisper-rs-sys]
|
35 |
version = "0.6"
|
36 |
|
|
|
37 |
[target.x86_64-unknown-linux-gnu.dependencies.whisper-rs]
|
38 |
+
version = "0.8"
|
39 |
features = ["cuda"]
|
40 |
|
41 |
[target.aarch64-unknown-linux-gnu.dependencies.whisper-rs]
|
42 |
+
version = "0.8"
|
43 |
features = ["cuda"]
|
Dockerfile
CHANGED
@@ -1,46 +1,24 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
# Install python3.11 and build dependencies
|
5 |
-
RUN apt-get update
|
6 |
-
RUN apt-get install -y software-properties-common
|
7 |
-
#RUN add-apt-repository ppa:deadsnakes/ppa
|
8 |
-
|
9 |
-
RUN apt-get update
|
10 |
-
RUN apt-get install -y libssl-dev cmake python3-dev curl pkg-config clang
|
11 |
-
|
12 |
-
# install rust toolchain
|
13 |
-
RUN curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain stable -y
|
14 |
ENV PATH=/root/.cargo/bin:$PATH
|
|
|
|
|
15 |
|
16 |
-
|
17 |
-
COPY
|
18 |
-
|
19 |
-
# Download dependencies
|
20 |
-
RUN mkdir -p src/bin && echo "fn main() {println!(\"if you see this, the build broke\")}" > src/bin/bigbot.rs
|
21 |
-
RUN --mount=type=cache,target=/usr/local/cargo/registry \
|
22 |
-
--mount=type=cache,target=/usr/local/cargo/git \
|
23 |
-
cargo build --release
|
24 |
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
# Build the project with release profile
|
28 |
-
RUN --mount=type=cache,target=/usr/local/cargo/registry \
|
29 |
-
--mount=type=cache,target=/usr/local/cargo/git \
|
30 |
-
cargo build --release
|
31 |
-
|
32 |
-
# Runtime stage
|
33 |
FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 as runtime
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
COPY --from=builder /target/release/polyhedron /usr/local/bin/polyhedron
|
40 |
-
COPY ./models/ggml-large-encoder.mlmodelc ./models/ggml-large-encoder.mlmodelc
|
41 |
-
COPY ./models/ggml-large.bin ./models/ggml-large.bin
|
42 |
-
COPY ./config/dev.yaml ./config/dev.yaml
|
43 |
-
COPY ./static ./static
|
44 |
-
|
45 |
-
# Run the binary
|
46 |
-
CMD ["polyhedron"]
|
|
|
1 |
+
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 as chef
|
2 |
+
RUN apt-get update && apt-get install -y curl
|
3 |
+
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain stable -y
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
ENV PATH=/root/.cargo/bin:$PATH
|
5 |
+
RUN cargo install cargo-chef
|
6 |
+
WORKDIR /app
|
7 |
|
8 |
+
FROM chef as planner
|
9 |
+
COPY . .
|
10 |
+
RUN cargo chef prepare --recipe-path recipe.json
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
+
FROM chef as builder
|
13 |
+
RUN apt-get update && apt-get install -y cmake g++ libclang-dev libssl-dev pkg-config python3-dev
|
14 |
+
COPY --from=planner /app/recipe.json recipe.json
|
15 |
+
RUN cargo chef cook --release --recipe-path recipe.json
|
16 |
+
COPY . .
|
17 |
+
RUN cargo build --release
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04 as runtime
|
20 |
|
21 |
+
RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/*
|
22 |
+
WORKDIR /app
|
23 |
+
COPY --from=builder /app/target/release/polyhedron .
|
24 |
+
ENTRYPOINT ["./polyhedron"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.MD
CHANGED
@@ -22,7 +22,7 @@ Configuration like AWS credentials and models are specified in config.yaml.
|
|
22 |
To run Polyhedron locally:
|
23 |
|
24 |
1. Config AWS account via https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html
|
25 |
-
2. Clone the repository, Run `
|
26 |
3. Open http://localhost:8080 in the browser
|
27 |
## Architecture
|
28 |
|
|
|
22 |
To run Polyhedron locally:
|
23 |
|
24 |
1. Config AWS account via https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html
|
25 |
+
2. Clone the repository, Run `docker compose up`
|
26 |
3. Open http://localhost:8080 in the browser
|
27 |
## Architecture
|
28 |
|
docker-compose.yaml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: "3"
|
2 |
+
services:
|
3 |
+
polyhedron:
|
4 |
+
container_name: polyhedron
|
5 |
+
build: ./
|
6 |
+
# image: vitongue/polyhedron:latest
|
7 |
+
environment:
|
8 |
+
- AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
|
9 |
+
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
10 |
+
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
11 |
+
- RUST_LOG=polyhedron=debug
|
12 |
+
volumes:
|
13 |
+
- ./config:/app/config
|
14 |
+
- ./static:/app/static
|
15 |
+
ports:
|
16 |
+
- "8080:8080"
|
src/config.rs
CHANGED
@@ -4,6 +4,7 @@ use config::{Config, Environment, File};
|
|
4 |
use once_cell::sync::Lazy;
|
5 |
use serde::Deserialize;
|
6 |
use whisper_rs::FullParams;
|
|
|
7 |
|
8 |
pub(crate) static SETTINGS: Lazy<Settings> =
|
9 |
Lazy::new(|| Settings::new().expect("Failed to initialize settings"));
|
@@ -85,6 +86,10 @@ impl Settings {
|
|
85 |
.map_err(anyhow::Error::from)?;
|
86 |
|
87 |
config.try_deserialize::<Self>().map_err(Into::into)
|
|
|
|
|
|
|
|
|
88 |
}
|
89 |
}
|
90 |
|
|
|
4 |
use once_cell::sync::Lazy;
|
5 |
use serde::Deserialize;
|
6 |
use whisper_rs::FullParams;
|
7 |
+
use tracing::debug;
|
8 |
|
9 |
pub(crate) static SETTINGS: Lazy<Settings> =
|
10 |
Lazy::new(|| Settings::new().expect("Failed to initialize settings"));
|
|
|
86 |
.map_err(anyhow::Error::from)?;
|
87 |
|
88 |
config.try_deserialize::<Self>().map_err(Into::into)
|
89 |
+
.map(|settings| {
|
90 |
+
debug!("Settings: {settings:?}");
|
91 |
+
settings
|
92 |
+
})
|
93 |
}
|
94 |
}
|
95 |
|
src/main.rs
CHANGED
@@ -19,6 +19,8 @@ use poem::{
|
|
19 |
};
|
20 |
use serde::{Deserialize, Serialize};
|
21 |
use tokio::select;
|
|
|
|
|
22 |
|
23 |
use crate::{config::*, lesson::*, whisper::*};
|
24 |
|
@@ -34,11 +36,12 @@ struct Context {
|
|
34 |
|
35 |
#[tokio::main]
|
36 |
async fn main() -> Result<(), std::io::Error> {
|
37 |
-
tracing_subscriber::
|
|
|
|
|
|
|
38 |
|
39 |
-
|
40 |
-
tracing::debug!("Transcribe client version: {}", PKG_VERSION);
|
41 |
-
}
|
42 |
|
43 |
let shared_config = aws_config::load_from_env().await;
|
44 |
let ctx = Context {
|
@@ -95,8 +98,8 @@ async fn stream_speaker(
|
|
95 |
ws.on_upgrade(|mut socket| async move {
|
96 |
let _origin_tx = lesson.voice_channel();
|
97 |
let mut transcribe_rx = lesson.transcript_channel();
|
98 |
-
let whisper =
|
99 |
-
|
100 |
let mut whisper_transcribe_rx = whisper.subscribe();
|
101 |
loop {
|
102 |
select! {
|
|
|
19 |
};
|
20 |
use serde::{Deserialize, Serialize};
|
21 |
use tokio::select;
|
22 |
+
use tracing::debug;
|
23 |
+
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
24 |
|
25 |
use crate::{config::*, lesson::*, whisper::*};
|
26 |
|
|
|
36 |
|
37 |
#[tokio::main]
|
38 |
async fn main() -> Result<(), std::io::Error> {
|
39 |
+
tracing_subscriber::registry()
|
40 |
+
.with(fmt::layer())
|
41 |
+
.with(EnvFilter::from_default_env())
|
42 |
+
.init();
|
43 |
|
44 |
+
debug!("Transcribe client version: {}", PKG_VERSION);
|
|
|
|
|
45 |
|
46 |
let shared_config = aws_config::load_from_env().await;
|
47 |
let ctx = Context {
|
|
|
98 |
ws.on_upgrade(|mut socket| async move {
|
99 |
let _origin_tx = lesson.voice_channel();
|
100 |
let mut transcribe_rx = lesson.transcript_channel();
|
101 |
+
let whisper = WhisperHandler::new(SETTINGS.whisper.clone(), prompt)
|
102 |
+
.expect("failed to create whisper");
|
103 |
let mut whisper_transcribe_rx = whisper.subscribe();
|
104 |
loop {
|
105 |
select! {
|
src/whisper.rs
CHANGED
@@ -8,6 +8,7 @@ use std::{
|
|
8 |
|
9 |
use once_cell::sync::Lazy;
|
10 |
use tokio::sync::{broadcast, mpsc, oneshot};
|
|
|
11 |
use whisper_rs::{convert_integer_to_float_audio, WhisperContext, WhisperState, WhisperToken};
|
12 |
use whisper_rs_sys::WHISPER_SAMPLE_RATE;
|
13 |
|
@@ -124,15 +125,13 @@ impl WhisperHandler {
|
|
124 |
}
|
125 |
};
|
126 |
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
);
|
135 |
-
}
|
136 |
}
|
137 |
|
138 |
if let Err(e) = shared_transcription_tx.send(segments) {
|
|
|
8 |
|
9 |
use once_cell::sync::Lazy;
|
10 |
use tokio::sync::{broadcast, mpsc, oneshot};
|
11 |
+
use tracing::trace;
|
12 |
use whisper_rs::{convert_integer_to_float_audio, WhisperContext, WhisperState, WhisperToken};
|
13 |
use whisper_rs_sys::WHISPER_SAMPLE_RATE;
|
14 |
|
|
|
125 |
}
|
126 |
};
|
127 |
|
128 |
+
for segment in segments.iter() {
|
129 |
+
trace!(
|
130 |
+
"[{}-{}]s SEGMENT: {}",
|
131 |
+
segment.start_timestamp as f32 / 1000.0,
|
132 |
+
segment.end_timestamp as f32 / 1000.0,
|
133 |
+
segment.text
|
134 |
+
);
|
|
|
|
|
135 |
}
|
136 |
|
137 |
if let Err(e) = shared_transcription_tx.send(segments) {
|