Spaces:
Sleeping
Sleeping
clean warning, clean code
Browse files- src/asr/aws.rs +8 -8
- src/asr/whisper.rs +5 -5
- src/lesson.rs +13 -12
- src/main.rs +119 -92
- static/index.html +1 -1
- whisper/src/config.rs +0 -1
- whisper/src/handler.rs +1 -13
src/asr/aws.rs
CHANGED
@@ -2,7 +2,7 @@ use std::error::Error;
|
|
2 |
use std::fmt::{Debug, Display, Formatter};
|
3 |
use async_stream::stream;
|
4 |
use async_trait::async_trait;
|
5 |
-
use aws_config::
|
6 |
use aws_sdk_transcribestreaming::operation::start_stream_transcription::StartStreamTranscriptionOutput;
|
7 |
use aws_sdk_transcribestreaming::primitives::Blob;
|
8 |
use aws_sdk_transcribestreaming::types::{
|
@@ -14,21 +14,21 @@ use tokio_stream::Stream;
|
|
14 |
use futures_util::TryStreamExt;
|
15 |
use crate::asr::{ASR, Event};
|
16 |
|
17 |
-
pub struct
|
18 |
speaker_voice_channel: tokio::sync::mpsc::Sender<Vec<i16>>,
|
19 |
speaker_transcript: tokio::sync::broadcast::Sender<Event>,
|
20 |
drop_handler: Option<tokio::sync::oneshot::Sender<()>>,
|
21 |
}
|
22 |
|
23 |
-
impl Debug for
|
24 |
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
25 |
write!(f, "AWS_ASR")
|
26 |
}
|
27 |
}
|
28 |
|
29 |
-
impl
|
30 |
pub async fn from_env(lang: LanguageCode) -> anyhow::Result<Self> {
|
31 |
-
let config = aws_config::
|
32 |
let transcript_client = aws_sdk_transcribestreaming::Client::new(&config);
|
33 |
|
34 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel::<Vec<i16>>(128);
|
@@ -87,12 +87,12 @@ fn slice_i16_to_u8(slice: &[i16]) -> Vec<u8> {
|
|
87 |
slice
|
88 |
.iter()
|
89 |
.flat_map(|&sample| {
|
90 |
-
[
|
91 |
})
|
92 |
.collect()
|
93 |
}
|
94 |
|
95 |
-
impl Drop for
|
96 |
fn drop(&mut self) {
|
97 |
if let Some(drop_handler) = self.drop_handler.take() {
|
98 |
let _ = drop_handler.send(());
|
@@ -102,7 +102,7 @@ impl Drop for AWS_ASR {
|
|
102 |
|
103 |
|
104 |
#[async_trait]
|
105 |
-
impl ASR for
|
106 |
async fn frame(&mut self, frame: Vec<i16>) -> anyhow::Result<()> {
|
107 |
Ok(self.speaker_voice_channel.send(frame).await?)
|
108 |
}
|
|
|
2 |
use std::fmt::{Debug, Display, Formatter};
|
3 |
use async_stream::stream;
|
4 |
use async_trait::async_trait;
|
5 |
+
use aws_config::BehaviorVersion;
|
6 |
use aws_sdk_transcribestreaming::operation::start_stream_transcription::StartStreamTranscriptionOutput;
|
7 |
use aws_sdk_transcribestreaming::primitives::Blob;
|
8 |
use aws_sdk_transcribestreaming::types::{
|
|
|
14 |
use futures_util::TryStreamExt;
|
15 |
use crate::asr::{ASR, Event};
|
16 |
|
17 |
+
pub struct AwsAsr {
|
18 |
speaker_voice_channel: tokio::sync::mpsc::Sender<Vec<i16>>,
|
19 |
speaker_transcript: tokio::sync::broadcast::Sender<Event>,
|
20 |
drop_handler: Option<tokio::sync::oneshot::Sender<()>>,
|
21 |
}
|
22 |
|
23 |
+
impl Debug for AwsAsr {
|
24 |
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
25 |
write!(f, "AWS_ASR")
|
26 |
}
|
27 |
}
|
28 |
|
29 |
+
impl AwsAsr {
|
30 |
pub async fn from_env(lang: LanguageCode) -> anyhow::Result<Self> {
|
31 |
+
let config = aws_config::load_defaults(BehaviorVersion::latest()).await;
|
32 |
let transcript_client = aws_sdk_transcribestreaming::Client::new(&config);
|
33 |
|
34 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel::<Vec<i16>>(128);
|
|
|
87 |
slice
|
88 |
.iter()
|
89 |
.flat_map(|&sample| {
|
90 |
+
[sample as u8, (sample >> 8) as u8]
|
91 |
})
|
92 |
.collect()
|
93 |
}
|
94 |
|
95 |
+
impl Drop for AwsAsr {
|
96 |
fn drop(&mut self) {
|
97 |
if let Some(drop_handler) = self.drop_handler.take() {
|
98 |
let _ = drop_handler.send(());
|
|
|
102 |
|
103 |
|
104 |
#[async_trait]
|
105 |
+
impl ASR for AwsAsr {
|
106 |
async fn frame(&mut self, frame: Vec<i16>) -> anyhow::Result<()> {
|
107 |
Ok(self.speaker_voice_channel.send(frame).await?)
|
108 |
}
|
src/asr/whisper.rs
CHANGED
@@ -16,19 +16,19 @@ lazy_static! {
|
|
16 |
.expect("Failed to initialize whisper context");
|
17 |
}
|
18 |
|
19 |
-
pub struct
|
20 |
whisper: WhisperHandler,
|
21 |
tx: tokio::sync::broadcast::Sender<Event>,
|
22 |
}
|
23 |
|
24 |
-
impl Debug for
|
25 |
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
26 |
write!(f, "Whisper_ASR")
|
27 |
}
|
28 |
}
|
29 |
|
30 |
-
impl
|
31 |
-
pub async fn from_config() -> Result<
|
32 |
let whisper = CONTEXT.create_handler(&SETTINGS.whisper, "".to_string())?;
|
33 |
let mut output_rx = whisper.subscribe();
|
34 |
let (tx, _) = tokio::sync::broadcast::channel(64);
|
@@ -71,7 +71,7 @@ impl Whisper_ASR {
|
|
71 |
}
|
72 |
|
73 |
#[async_trait]
|
74 |
-
impl ASR for
|
75 |
async fn frame(&mut self, frame: Vec<i16>) -> anyhow::Result<()> {
|
76 |
Ok(self.whisper.send_i16(frame).await?)
|
77 |
}
|
|
|
16 |
.expect("Failed to initialize whisper context");
|
17 |
}
|
18 |
|
19 |
+
pub struct WhisperAsr {
|
20 |
whisper: WhisperHandler,
|
21 |
tx: tokio::sync::broadcast::Sender<Event>,
|
22 |
}
|
23 |
|
24 |
+
impl Debug for WhisperAsr {
|
25 |
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
26 |
write!(f, "Whisper_ASR")
|
27 |
}
|
28 |
}
|
29 |
|
30 |
+
impl WhisperAsr {
|
31 |
+
pub async fn from_config() -> Result<WhisperAsr, Error> {
|
32 |
let whisper = CONTEXT.create_handler(&SETTINGS.whisper, "".to_string())?;
|
33 |
let mut output_rx = whisper.subscribe();
|
34 |
let (tx, _) = tokio::sync::broadcast::channel(64);
|
|
|
71 |
}
|
72 |
|
73 |
#[async_trait]
|
74 |
+
impl ASR for WhisperAsr {
|
75 |
async fn frame(&mut self, frame: Vec<i16>) -> anyhow::Result<()> {
|
76 |
Ok(self.whisper.send_i16(frame).await?)
|
77 |
}
|
src/lesson.rs
CHANGED
@@ -5,18 +5,18 @@ use aws_sdk_transcribestreaming::types::{LanguageCode};
|
|
5 |
use futures_util::future::try_join;
|
6 |
use serde::{Deserialize, Serialize};
|
7 |
use std::collections::BTreeMap;
|
8 |
-
use std::fmt::{Debug,
|
9 |
use std::io::BufRead;
|
10 |
use std::ops::Deref;
|
11 |
use std::sync::{Arc, Weak};
|
12 |
use tokio::sync::RwLock;
|
13 |
-
use tracing::{error
|
14 |
|
15 |
use tokio::select;
|
16 |
-
use crate::asr::{Event, aws::
|
17 |
|
18 |
#[cfg(feature = "whisper")]
|
19 |
-
use crate::asr::whisper::
|
20 |
|
21 |
pub struct InnerLessonsManager {
|
22 |
translate_client: aws_sdk_translate::Client,
|
@@ -43,23 +43,24 @@ impl Deref for LessonsManager {
|
|
43 |
}
|
44 |
}
|
45 |
|
46 |
-
pub(crate) enum
|
47 |
AWS,
|
|
|
48 |
#[cfg(feature = "whisper")]
|
49 |
Whisper,
|
50 |
}
|
51 |
|
52 |
-
impl
|
53 |
async fn create(self, lang: LanguageCode) -> Box<dyn ASR + Send> {
|
54 |
match self {
|
55 |
-
|
56 |
-
|
57 |
.await
|
58 |
.expect("Failed to initialize AWS ASR")
|
59 |
),
|
60 |
#[cfg(feature = "whisper")]
|
61 |
-
|
62 |
-
|
63 |
.await
|
64 |
.expect("Failed to initialize Whisper ASR")
|
65 |
),
|
@@ -79,7 +80,7 @@ impl LessonsManager {
|
|
79 |
LessonsManager { inner: Arc::new(inner) }
|
80 |
}
|
81 |
|
82 |
-
pub(crate) async fn create_lesson(&self, id: u32, engine:
|
83 |
let mut map = self.lessons.write().await;
|
84 |
let lesson: Lesson = InnerLesson::new(self.clone(), engine, speaker_lang).await.into();
|
85 |
map.insert(id, lesson.clone());
|
@@ -153,7 +154,7 @@ pub(crate) struct InnerLesson {
|
|
153 |
}
|
154 |
|
155 |
impl InnerLesson {
|
156 |
-
async fn new(parent: LessonsManager, engine:
|
157 |
let (speaker_transcript, _) = tokio::sync::broadcast::channel::<Event>(128);
|
158 |
let shared_speaker_transcript = speaker_transcript.clone();
|
159 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel::<Vec<i16>>(128);
|
|
|
5 |
use futures_util::future::try_join;
|
6 |
use serde::{Deserialize, Serialize};
|
7 |
use std::collections::BTreeMap;
|
8 |
+
use std::fmt::{Debug, Formatter};
|
9 |
use std::io::BufRead;
|
10 |
use std::ops::Deref;
|
11 |
use std::sync::{Arc, Weak};
|
12 |
use tokio::sync::RwLock;
|
13 |
+
use tracing::{error};
|
14 |
|
15 |
use tokio::select;
|
16 |
+
use crate::asr::{Event, aws::AwsAsr, ASR};
|
17 |
|
18 |
#[cfg(feature = "whisper")]
|
19 |
+
use crate::asr::whisper::WhisperAsr;
|
20 |
|
21 |
pub struct InnerLessonsManager {
|
22 |
translate_client: aws_sdk_translate::Client,
|
|
|
43 |
}
|
44 |
}
|
45 |
|
46 |
+
pub(crate) enum AsrEngine {
|
47 |
AWS,
|
48 |
+
#[allow(dead_code)]
|
49 |
#[cfg(feature = "whisper")]
|
50 |
Whisper,
|
51 |
}
|
52 |
|
53 |
+
impl AsrEngine {
|
54 |
async fn create(self, lang: LanguageCode) -> Box<dyn ASR + Send> {
|
55 |
match self {
|
56 |
+
AsrEngine::AWS => Box::new(
|
57 |
+
AwsAsr::from_env(lang)
|
58 |
.await
|
59 |
.expect("Failed to initialize AWS ASR")
|
60 |
),
|
61 |
#[cfg(feature = "whisper")]
|
62 |
+
AsrEngine::Whisper => Box::new(
|
63 |
+
WhisperAsr::from_config()
|
64 |
.await
|
65 |
.expect("Failed to initialize Whisper ASR")
|
66 |
),
|
|
|
80 |
LessonsManager { inner: Arc::new(inner) }
|
81 |
}
|
82 |
|
83 |
+
pub(crate) async fn create_lesson(&self, id: u32, engine: AsrEngine, speaker_lang: LanguageCode) -> Lesson {
|
84 |
let mut map = self.lessons.write().await;
|
85 |
let lesson: Lesson = InnerLesson::new(self.clone(), engine, speaker_lang).await.into();
|
86 |
map.insert(id, lesson.clone());
|
|
|
154 |
}
|
155 |
|
156 |
impl InnerLesson {
|
157 |
+
async fn new(parent: LessonsManager, engine: AsrEngine, speaker_lang: LanguageCode) -> InnerLesson {
|
158 |
let (speaker_transcript, _) = tokio::sync::broadcast::channel::<Event>(128);
|
159 |
let shared_speaker_transcript = speaker_transcript.clone();
|
160 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel::<Vec<i16>>(128);
|
src/main.rs
CHANGED
@@ -7,7 +7,10 @@
|
|
7 |
|
8 |
#[cfg(feature = "whisper")]
|
9 |
extern crate whisper;
|
|
|
|
|
10 |
use aws_sdk_transcribestreaming::meta::PKG_VERSION;
|
|
|
11 |
use futures_util::{stream::StreamExt, SinkExt};
|
12 |
use poem::{
|
13 |
endpoint::{StaticFileEndpoint, StaticFilesEndpoint},
|
@@ -44,7 +47,7 @@ async fn main() -> Result<(), std::io::Error> {
|
|
44 |
|
45 |
debug!("Transcribe client version: {}", PKG_VERSION);
|
46 |
|
47 |
-
let shared_config = aws_config::
|
48 |
let ctx = Context {
|
49 |
lessons_manager: LessonsManager::new(&shared_config),
|
50 |
};
|
@@ -57,7 +60,9 @@ async fn main() -> Result<(), std::io::Error> {
|
|
57 |
.index_file("index.html"),
|
58 |
)
|
59 |
.at("/ws/lesson-speaker", get(stream_speaker))
|
|
|
60 |
.at("/ws/lesson-listener", get(stream_listener))
|
|
|
61 |
.at(
|
62 |
"lesson-speaker",
|
63 |
StaticFileEndpoint::new("./static/index.html"),
|
@@ -83,7 +88,8 @@ async fn main() -> Result<(), std::io::Error> {
|
|
83 |
#[derive(Deserialize, Debug)]
|
84 |
pub struct LessonSpeakerQuery {
|
85 |
id: u32,
|
86 |
-
|
|
|
87 |
prompt: Option<String>,
|
88 |
}
|
89 |
|
@@ -93,51 +99,58 @@ async fn stream_speaker(
|
|
93 |
query: Query<LessonSpeakerQuery>,
|
94 |
ws: WebSocket,
|
95 |
) -> impl IntoResponse {
|
96 |
-
let
|
97 |
-
.lessons_manager
|
98 |
-
.create_lesson(
|
99 |
-
query.id,
|
100 |
-
ASR_Engine::AWS,
|
101 |
-
query.lang.clone().parse().expect("Not supported lang"),
|
102 |
-
)
|
103 |
-
.await;
|
104 |
-
let prompt = query.prompt.clone().unwrap_or_default();
|
105 |
-
|
106 |
ws.on_upgrade(|mut socket| async move {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
let mut transcribe_rx = lesson.transcript_channel();
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
Some(
|
113 |
-
|
114 |
-
|
115 |
-
tracing::warn!("failed to send voice: {}", e);
|
116 |
-
break;
|
117 |
-
}
|
118 |
-
},
|
119 |
-
Some(Ok(_)) => {
|
120 |
tracing::warn!("Other: {:?}", msg);
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
}
|
129 |
-
break;
|
130 |
-
}
|
131 |
-
}
|
132 |
-
},
|
133 |
-
output = transcribe_rx.recv() => {
|
134 |
-
if let Ok(evt) = output {
|
135 |
tracing::trace!("Transcribed: {}", evt.transcript);
|
136 |
-
let evt = LiveLessonTextEvent::Transcription {
|
137 |
-
let json = serde_json::to_string(&evt)
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
}
|
142 |
}
|
143 |
})
|
@@ -146,26 +159,29 @@ async fn stream_speaker(
|
|
146 |
#[derive(Deserialize, Debug)]
|
147 |
pub struct LessonListenerQuery {
|
148 |
id: u32,
|
149 |
-
|
150 |
voice: String,
|
151 |
}
|
152 |
|
153 |
-
#[derive(Serialize)]
|
154 |
#[serde(tag = "type")]
|
155 |
enum LiveLessonTextEvent {
|
156 |
-
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
158 |
LipSync { visemes: Vec<Viseme> },
|
159 |
}
|
160 |
-
|
161 |
#[handler]
|
162 |
async fn stream_listener(
|
163 |
ctx: Data<&Context>,
|
164 |
query: Query<LessonListenerQuery>,
|
165 |
ws: WebSocket,
|
166 |
) -> impl IntoResponse {
|
167 |
-
let
|
168 |
-
debug!("listener param = {:?}", query);
|
169 |
|
170 |
ws.on_upgrade(|mut socket| async move {
|
171 |
let voice_id = match query.voice.parse() {
|
@@ -177,6 +193,9 @@ async fn stream_listener(
|
|
177 |
return;
|
178 |
}
|
179 |
};
|
|
|
|
|
|
|
180 |
let Some(lesson) = lesson_opt else {
|
181 |
let _ = socket
|
182 |
.send(Message::Text("lesson not found".to_string()))
|
@@ -184,54 +203,62 @@ async fn stream_listener(
|
|
184 |
return;
|
185 |
};
|
186 |
let mut transcript_rx = lesson.transcript_channel();
|
187 |
-
let mut lang_lesson = lesson.get_or_init(query.
|
188 |
let mut translate_rx = lang_lesson.translated_channel();
|
189 |
let mut voice_lesson = lang_lesson.get_or_init(voice_id).await;
|
190 |
let mut voice_rx = voice_lesson.voice_channel();
|
191 |
let mut lip_sync_rx = voice_lesson.lip_sync_channel();
|
192 |
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
let
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
}
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
let evt = LiveLessonTextEvent::Translation {
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
let _ = socket.send(Message::Binary(voice)).await;
|
226 |
-
}
|
227 |
-
},
|
228 |
-
visemes = lip_sync_rx.recv() => {
|
229 |
-
if let Ok(visemes) = visemes {
|
230 |
let evt = LiveLessonTextEvent::LipSync { visemes };
|
231 |
-
let json = serde_json::to_string(&evt)
|
232 |
-
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
}
|
236 |
}
|
237 |
})
|
|
|
7 |
|
8 |
#[cfg(feature = "whisper")]
|
9 |
extern crate whisper;
|
10 |
+
|
11 |
+
use aws_config::BehaviorVersion;
|
12 |
use aws_sdk_transcribestreaming::meta::PKG_VERSION;
|
13 |
+
use aws_sdk_transcribestreaming::types::LanguageCode;
|
14 |
use futures_util::{stream::StreamExt, SinkExt};
|
15 |
use poem::{
|
16 |
endpoint::{StaticFileEndpoint, StaticFilesEndpoint},
|
|
|
47 |
|
48 |
debug!("Transcribe client version: {}", PKG_VERSION);
|
49 |
|
50 |
+
let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await;
|
51 |
let ctx = Context {
|
52 |
lessons_manager: LessonsManager::new(&shared_config),
|
53 |
};
|
|
|
60 |
.index_file("index.html"),
|
61 |
)
|
62 |
.at("/ws/lesson-speaker", get(stream_speaker))
|
63 |
+
.at("/ws/teacher", get(stream_speaker))
|
64 |
.at("/ws/lesson-listener", get(stream_listener))
|
65 |
+
.at("/ws/student", get(stream_listener))
|
66 |
.at(
|
67 |
"lesson-speaker",
|
68 |
StaticFileEndpoint::new("./static/index.html"),
|
|
|
88 |
#[derive(Deserialize, Debug)]
|
89 |
pub struct LessonSpeakerQuery {
|
90 |
id: u32,
|
91 |
+
language: String,
|
92 |
+
#[allow(dead_code)] // TODO: use this in the future
|
93 |
prompt: Option<String>,
|
94 |
}
|
95 |
|
|
|
99 |
query: Query<LessonSpeakerQuery>,
|
100 |
ws: WebSocket,
|
101 |
) -> impl IntoResponse {
|
102 |
+
let lessons_manager = ctx.lessons_manager.clone();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
ws.on_upgrade(|mut socket| async move {
|
104 |
+
let Ok(lang) = query.language.parse::<LanguageCode>() else {
|
105 |
+
let _ = socket
|
106 |
+
.send(Message::Text(format!("invalid language code: {}", query.language)))
|
107 |
+
.await;
|
108 |
+
return
|
109 |
+
};
|
110 |
+
let lesson = lessons_manager
|
111 |
+
.create_lesson(
|
112 |
+
query.id,
|
113 |
+
AsrEngine::AWS,
|
114 |
+
lang,
|
115 |
+
)
|
116 |
+
.await;
|
117 |
+
|
118 |
let mut transcribe_rx = lesson.transcript_channel();
|
119 |
+
let fut = async {
|
120 |
+
loop {
|
121 |
+
select! {
|
122 |
+
msg = socket.next() => {
|
123 |
+
let Some(res) = msg else { break };
|
124 |
+
let msg = res?;
|
125 |
+
let Message::Binary(bin) = msg else {
|
|
|
|
|
|
|
|
|
|
|
126 |
tracing::warn!("Other: {:?}", msg);
|
127 |
+
continue
|
128 |
+
};
|
129 |
+
let frame = u8_to_i16(&bin);
|
130 |
+
lesson.send(frame).await?
|
131 |
+
},
|
132 |
+
output = transcribe_rx.recv() => {
|
133 |
+
let evt = output?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
tracing::trace!("Transcribed: {}", evt.transcript);
|
135 |
+
let evt = LiveLessonTextEvent::Transcription { content: evt.transcript, is_final: evt.is_final };
|
136 |
+
let Ok(json) = serde_json::to_string(&evt) else {
|
137 |
+
tracing::warn!("failed to serialize json: {:?}", evt);
|
138 |
+
continue
|
139 |
+
};
|
140 |
+
socket.send(Message::Text(json)).await?
|
141 |
+
},
|
142 |
+
}
|
143 |
+
}
|
144 |
+
Ok(())
|
145 |
+
};
|
146 |
+
|
147 |
+
let res: anyhow::Result<()> = fut.await;
|
148 |
+
match res {
|
149 |
+
Ok(()) => {
|
150 |
+
tracing::info!("lesson speaker closed");
|
151 |
+
}
|
152 |
+
Err(e) => {
|
153 |
+
tracing::warn!("lesson speaker error: {}", e);
|
154 |
}
|
155 |
}
|
156 |
})
|
|
|
159 |
#[derive(Deserialize, Debug)]
|
160 |
pub struct LessonListenerQuery {
|
161 |
id: u32,
|
162 |
+
language: String,
|
163 |
voice: String,
|
164 |
}
|
165 |
|
166 |
+
#[derive(Serialize, Debug)]
|
167 |
#[serde(tag = "type")]
|
168 |
enum LiveLessonTextEvent {
|
169 |
+
#[serde(rename = "original")]
|
170 |
+
Transcription {
|
171 |
+
content: String,
|
172 |
+
#[serde(rename = "isFinal")]
|
173 |
+
is_final: bool
|
174 |
+
},
|
175 |
+
Translation { content: String },
|
176 |
LipSync { visemes: Vec<Viseme> },
|
177 |
}
|
|
|
178 |
#[handler]
|
179 |
async fn stream_listener(
|
180 |
ctx: Data<&Context>,
|
181 |
query: Query<LessonListenerQuery>,
|
182 |
ws: WebSocket,
|
183 |
) -> impl IntoResponse {
|
184 |
+
let lessons_manager = ctx.lessons_manager.clone();
|
|
|
185 |
|
186 |
ws.on_upgrade(|mut socket| async move {
|
187 |
let voice_id = match query.voice.parse() {
|
|
|
193 |
return;
|
194 |
}
|
195 |
};
|
196 |
+
|
197 |
+
let lesson_opt = lessons_manager.get_lesson(query.id).await;
|
198 |
+
debug!("listener param = {:?}", query);
|
199 |
let Some(lesson) = lesson_opt else {
|
200 |
let _ = socket
|
201 |
.send(Message::Text("lesson not found".to_string()))
|
|
|
203 |
return;
|
204 |
};
|
205 |
let mut transcript_rx = lesson.transcript_channel();
|
206 |
+
let mut lang_lesson = lesson.get_or_init(query.language.clone()).await;
|
207 |
let mut translate_rx = lang_lesson.translated_channel();
|
208 |
let mut voice_lesson = lang_lesson.get_or_init(voice_id).await;
|
209 |
let mut voice_rx = voice_lesson.voice_channel();
|
210 |
let mut lip_sync_rx = voice_lesson.lip_sync_channel();
|
211 |
|
212 |
+
let fut = async {
|
213 |
+
loop {
|
214 |
+
select! {
|
215 |
+
transcript_poll = transcript_rx.recv() => {
|
216 |
+
let transcript = transcript_poll?;
|
217 |
+
let evt = LiveLessonTextEvent::Transcription {
|
218 |
+
content: transcript.transcript,
|
219 |
+
is_final: transcript.is_final
|
220 |
+
};
|
221 |
+
let Ok(json) = serde_json::to_string(&evt) else {
|
222 |
+
tracing::warn!("failed to serialize: {:?}", evt);
|
223 |
+
continue
|
224 |
+
};
|
225 |
+
tracing::debug!("Transcribed: {}", json);
|
226 |
+
socket.send(Message::Text(json)).await?
|
227 |
+
},
|
228 |
+
translated_poll = translate_rx.recv() => {
|
229 |
+
let translated = translated_poll?;
|
230 |
+
let evt = LiveLessonTextEvent::Translation { content: translated };
|
231 |
+
let Ok(json) = serde_json::to_string(&evt) else {
|
232 |
+
tracing::warn!("failed to serialize: {:?}", evt);
|
233 |
+
continue
|
234 |
+
};
|
235 |
+
tracing::debug!("Translated: {}", json);
|
236 |
+
socket.send(Message::Text(json)).await?
|
237 |
+
},
|
238 |
+
voice_poll = voice_rx.recv() => {
|
239 |
+
let voice = voice_poll?;
|
240 |
+
socket.send(Message::Binary(voice)).await?
|
241 |
+
},
|
242 |
+
visemes_poll = lip_sync_rx.recv() => {
|
243 |
+
let visemes = visemes_poll?;
|
|
|
|
|
|
|
|
|
|
|
244 |
let evt = LiveLessonTextEvent::LipSync { visemes };
|
245 |
+
let Ok(json) = serde_json::to_string(&evt) else {
|
246 |
+
tracing::warn!("failed to serialize: {:?}", evt);
|
247 |
+
continue
|
248 |
+
};
|
249 |
+
socket.send(Message::Text(json)).await?
|
250 |
+
},
|
251 |
+
}
|
252 |
+
}
|
253 |
+
};
|
254 |
+
|
255 |
+
let res: anyhow::Result<()> = fut.await;
|
256 |
+
match res {
|
257 |
+
Ok(()) => {
|
258 |
+
tracing::info!("lesson listener closed");
|
259 |
+
}
|
260 |
+
Err(e) => {
|
261 |
+
tracing::warn!("lesson listener error: {}", e);
|
262 |
}
|
263 |
}
|
264 |
})
|
static/index.html
CHANGED
@@ -5,7 +5,7 @@
|
|
5 |
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
7 |
<title>Vite + React</title>
|
8 |
-
<script type="module" crossorigin src="/assets/index-
|
9 |
<link rel="stylesheet" href="/assets/index-983f9492.css">
|
10 |
</head>
|
11 |
<body>
|
|
|
5 |
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
7 |
<title>Vite + React</title>
|
8 |
+
<script type="module" crossorigin src="/assets/index-b40ca4ea.js"></script>
|
9 |
<link rel="stylesheet" href="/assets/index-983f9492.css">
|
10 |
</head>
|
11 |
<body>
|
whisper/src/config.rs
CHANGED
@@ -9,7 +9,6 @@ pub struct WhisperConfig {
|
|
9 |
pub(crate) keep_ms: usize,
|
10 |
pub model: String,
|
11 |
pub(crate) max_prompt_tokens: usize,
|
12 |
-
pub(crate) context_confidence_threshold: f32,
|
13 |
}
|
14 |
|
15 |
#[allow(dead_code)]
|
|
|
9 |
pub(crate) keep_ms: usize,
|
10 |
pub model: String,
|
11 |
pub(crate) max_prompt_tokens: usize,
|
|
|
12 |
}
|
13 |
|
14 |
#[allow(dead_code)]
|
whisper/src/handler.rs
CHANGED
@@ -6,7 +6,7 @@ use std::{
|
|
6 |
};
|
7 |
use fvad::SampleRate;
|
8 |
|
9 |
-
use tokio::sync::{broadcast, mpsc, oneshot
|
10 |
use tokio::time::Instant;
|
11 |
use tracing::{warn};
|
12 |
use whisper_rs::{convert_integer_to_float_audio, WhisperContext, WhisperError, WhisperState, WhisperToken, WhisperTokenData};
|
@@ -30,18 +30,6 @@ impl <'a> Context {
|
|
30 |
}
|
31 |
}
|
32 |
|
33 |
-
static WHISPER_CONTEXT: OnceCell<WhisperContext> = OnceCell::const_new();
|
34 |
-
|
35 |
-
async fn initialize_whisper_context(model: String) -> WhisperContext {
|
36 |
-
tokio::task::spawn_blocking(move || {
|
37 |
-
WhisperContext::new(&model).expect("failed to create WhisperContext")
|
38 |
-
}).await.expect("failed to spawn")
|
39 |
-
}
|
40 |
-
|
41 |
-
async fn get_whisper_context(model: String) -> &'static WhisperContext {
|
42 |
-
WHISPER_CONTEXT.get_or_init(|| initialize_whisper_context(model)).await
|
43 |
-
}
|
44 |
-
|
45 |
#[derive(Debug)]
|
46 |
pub enum Error {
|
47 |
WhisperError {
|
|
|
6 |
};
|
7 |
use fvad::SampleRate;
|
8 |
|
9 |
+
use tokio::sync::{broadcast, mpsc, oneshot};
|
10 |
use tokio::time::Instant;
|
11 |
use tracing::{warn};
|
12 |
use whisper_rs::{convert_integer_to_float_audio, WhisperContext, WhisperError, WhisperState, WhisperToken, WhisperTokenData};
|
|
|
30 |
}
|
31 |
}
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
#[derive(Debug)]
|
34 |
pub enum Error {
|
35 |
WhisperError {
|