Spaces:
Build error
Build error
clean warning, clean code
Browse files- src/asr/aws.rs +8 -8
- src/asr/whisper.rs +5 -5
- src/lesson.rs +13 -12
- src/main.rs +119 -92
- static/index.html +1 -1
- whisper/src/config.rs +0 -1
- whisper/src/handler.rs +1 -13
src/asr/aws.rs
CHANGED
|
@@ -2,7 +2,7 @@ use std::error::Error;
|
|
| 2 |
use std::fmt::{Debug, Display, Formatter};
|
| 3 |
use async_stream::stream;
|
| 4 |
use async_trait::async_trait;
|
| 5 |
-
use aws_config::
|
| 6 |
use aws_sdk_transcribestreaming::operation::start_stream_transcription::StartStreamTranscriptionOutput;
|
| 7 |
use aws_sdk_transcribestreaming::primitives::Blob;
|
| 8 |
use aws_sdk_transcribestreaming::types::{
|
|
@@ -14,21 +14,21 @@ use tokio_stream::Stream;
|
|
| 14 |
use futures_util::TryStreamExt;
|
| 15 |
use crate::asr::{ASR, Event};
|
| 16 |
|
| 17 |
-
pub struct
|
| 18 |
speaker_voice_channel: tokio::sync::mpsc::Sender<Vec<i16>>,
|
| 19 |
speaker_transcript: tokio::sync::broadcast::Sender<Event>,
|
| 20 |
drop_handler: Option<tokio::sync::oneshot::Sender<()>>,
|
| 21 |
}
|
| 22 |
|
| 23 |
-
impl Debug for
|
| 24 |
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
| 25 |
write!(f, "AWS_ASR")
|
| 26 |
}
|
| 27 |
}
|
| 28 |
|
| 29 |
-
impl
|
| 30 |
pub async fn from_env(lang: LanguageCode) -> anyhow::Result<Self> {
|
| 31 |
-
let config = aws_config::
|
| 32 |
let transcript_client = aws_sdk_transcribestreaming::Client::new(&config);
|
| 33 |
|
| 34 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel::<Vec<i16>>(128);
|
|
@@ -87,12 +87,12 @@ fn slice_i16_to_u8(slice: &[i16]) -> Vec<u8> {
|
|
| 87 |
slice
|
| 88 |
.iter()
|
| 89 |
.flat_map(|&sample| {
|
| 90 |
-
[
|
| 91 |
})
|
| 92 |
.collect()
|
| 93 |
}
|
| 94 |
|
| 95 |
-
impl Drop for
|
| 96 |
fn drop(&mut self) {
|
| 97 |
if let Some(drop_handler) = self.drop_handler.take() {
|
| 98 |
let _ = drop_handler.send(());
|
|
@@ -102,7 +102,7 @@ impl Drop for AWS_ASR {
|
|
| 102 |
|
| 103 |
|
| 104 |
#[async_trait]
|
| 105 |
-
impl ASR for
|
| 106 |
async fn frame(&mut self, frame: Vec<i16>) -> anyhow::Result<()> {
|
| 107 |
Ok(self.speaker_voice_channel.send(frame).await?)
|
| 108 |
}
|
|
|
|
| 2 |
use std::fmt::{Debug, Display, Formatter};
|
| 3 |
use async_stream::stream;
|
| 4 |
use async_trait::async_trait;
|
| 5 |
+
use aws_config::BehaviorVersion;
|
| 6 |
use aws_sdk_transcribestreaming::operation::start_stream_transcription::StartStreamTranscriptionOutput;
|
| 7 |
use aws_sdk_transcribestreaming::primitives::Blob;
|
| 8 |
use aws_sdk_transcribestreaming::types::{
|
|
|
|
| 14 |
use futures_util::TryStreamExt;
|
| 15 |
use crate::asr::{ASR, Event};
|
| 16 |
|
| 17 |
+
pub struct AwsAsr {
|
| 18 |
speaker_voice_channel: tokio::sync::mpsc::Sender<Vec<i16>>,
|
| 19 |
speaker_transcript: tokio::sync::broadcast::Sender<Event>,
|
| 20 |
drop_handler: Option<tokio::sync::oneshot::Sender<()>>,
|
| 21 |
}
|
| 22 |
|
| 23 |
+
impl Debug for AwsAsr {
|
| 24 |
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
| 25 |
write!(f, "AWS_ASR")
|
| 26 |
}
|
| 27 |
}
|
| 28 |
|
| 29 |
+
impl AwsAsr {
|
| 30 |
pub async fn from_env(lang: LanguageCode) -> anyhow::Result<Self> {
|
| 31 |
+
let config = aws_config::load_defaults(BehaviorVersion::latest()).await;
|
| 32 |
let transcript_client = aws_sdk_transcribestreaming::Client::new(&config);
|
| 33 |
|
| 34 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel::<Vec<i16>>(128);
|
|
|
|
| 87 |
slice
|
| 88 |
.iter()
|
| 89 |
.flat_map(|&sample| {
|
| 90 |
+
[sample as u8, (sample >> 8) as u8]
|
| 91 |
})
|
| 92 |
.collect()
|
| 93 |
}
|
| 94 |
|
| 95 |
+
impl Drop for AwsAsr {
|
| 96 |
fn drop(&mut self) {
|
| 97 |
if let Some(drop_handler) = self.drop_handler.take() {
|
| 98 |
let _ = drop_handler.send(());
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
#[async_trait]
|
| 105 |
+
impl ASR for AwsAsr {
|
| 106 |
async fn frame(&mut self, frame: Vec<i16>) -> anyhow::Result<()> {
|
| 107 |
Ok(self.speaker_voice_channel.send(frame).await?)
|
| 108 |
}
|
src/asr/whisper.rs
CHANGED
|
@@ -16,19 +16,19 @@ lazy_static! {
|
|
| 16 |
.expect("Failed to initialize whisper context");
|
| 17 |
}
|
| 18 |
|
| 19 |
-
pub struct
|
| 20 |
whisper: WhisperHandler,
|
| 21 |
tx: tokio::sync::broadcast::Sender<Event>,
|
| 22 |
}
|
| 23 |
|
| 24 |
-
impl Debug for
|
| 25 |
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
| 26 |
write!(f, "Whisper_ASR")
|
| 27 |
}
|
| 28 |
}
|
| 29 |
|
| 30 |
-
impl
|
| 31 |
-
pub async fn from_config() -> Result<
|
| 32 |
let whisper = CONTEXT.create_handler(&SETTINGS.whisper, "".to_string())?;
|
| 33 |
let mut output_rx = whisper.subscribe();
|
| 34 |
let (tx, _) = tokio::sync::broadcast::channel(64);
|
|
@@ -71,7 +71,7 @@ impl Whisper_ASR {
|
|
| 71 |
}
|
| 72 |
|
| 73 |
#[async_trait]
|
| 74 |
-
impl ASR for
|
| 75 |
async fn frame(&mut self, frame: Vec<i16>) -> anyhow::Result<()> {
|
| 76 |
Ok(self.whisper.send_i16(frame).await?)
|
| 77 |
}
|
|
|
|
| 16 |
.expect("Failed to initialize whisper context");
|
| 17 |
}
|
| 18 |
|
| 19 |
+
pub struct WhisperAsr {
|
| 20 |
whisper: WhisperHandler,
|
| 21 |
tx: tokio::sync::broadcast::Sender<Event>,
|
| 22 |
}
|
| 23 |
|
| 24 |
+
impl Debug for WhisperAsr {
|
| 25 |
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
| 26 |
write!(f, "Whisper_ASR")
|
| 27 |
}
|
| 28 |
}
|
| 29 |
|
| 30 |
+
impl WhisperAsr {
|
| 31 |
+
pub async fn from_config() -> Result<WhisperAsr, Error> {
|
| 32 |
let whisper = CONTEXT.create_handler(&SETTINGS.whisper, "".to_string())?;
|
| 33 |
let mut output_rx = whisper.subscribe();
|
| 34 |
let (tx, _) = tokio::sync::broadcast::channel(64);
|
|
|
|
| 71 |
}
|
| 72 |
|
| 73 |
#[async_trait]
|
| 74 |
+
impl ASR for WhisperAsr {
|
| 75 |
async fn frame(&mut self, frame: Vec<i16>) -> anyhow::Result<()> {
|
| 76 |
Ok(self.whisper.send_i16(frame).await?)
|
| 77 |
}
|
src/lesson.rs
CHANGED
|
@@ -5,18 +5,18 @@ use aws_sdk_transcribestreaming::types::{LanguageCode};
|
|
| 5 |
use futures_util::future::try_join;
|
| 6 |
use serde::{Deserialize, Serialize};
|
| 7 |
use std::collections::BTreeMap;
|
| 8 |
-
use std::fmt::{Debug,
|
| 9 |
use std::io::BufRead;
|
| 10 |
use std::ops::Deref;
|
| 11 |
use std::sync::{Arc, Weak};
|
| 12 |
use tokio::sync::RwLock;
|
| 13 |
-
use tracing::{error
|
| 14 |
|
| 15 |
use tokio::select;
|
| 16 |
-
use crate::asr::{Event, aws::
|
| 17 |
|
| 18 |
#[cfg(feature = "whisper")]
|
| 19 |
-
use crate::asr::whisper::
|
| 20 |
|
| 21 |
pub struct InnerLessonsManager {
|
| 22 |
translate_client: aws_sdk_translate::Client,
|
|
@@ -43,23 +43,24 @@ impl Deref for LessonsManager {
|
|
| 43 |
}
|
| 44 |
}
|
| 45 |
|
| 46 |
-
pub(crate) enum
|
| 47 |
AWS,
|
|
|
|
| 48 |
#[cfg(feature = "whisper")]
|
| 49 |
Whisper,
|
| 50 |
}
|
| 51 |
|
| 52 |
-
impl
|
| 53 |
async fn create(self, lang: LanguageCode) -> Box<dyn ASR + Send> {
|
| 54 |
match self {
|
| 55 |
-
|
| 56 |
-
|
| 57 |
.await
|
| 58 |
.expect("Failed to initialize AWS ASR")
|
| 59 |
),
|
| 60 |
#[cfg(feature = "whisper")]
|
| 61 |
-
|
| 62 |
-
|
| 63 |
.await
|
| 64 |
.expect("Failed to initialize Whisper ASR")
|
| 65 |
),
|
|
@@ -79,7 +80,7 @@ impl LessonsManager {
|
|
| 79 |
LessonsManager { inner: Arc::new(inner) }
|
| 80 |
}
|
| 81 |
|
| 82 |
-
pub(crate) async fn create_lesson(&self, id: u32, engine:
|
| 83 |
let mut map = self.lessons.write().await;
|
| 84 |
let lesson: Lesson = InnerLesson::new(self.clone(), engine, speaker_lang).await.into();
|
| 85 |
map.insert(id, lesson.clone());
|
|
@@ -153,7 +154,7 @@ pub(crate) struct InnerLesson {
|
|
| 153 |
}
|
| 154 |
|
| 155 |
impl InnerLesson {
|
| 156 |
-
async fn new(parent: LessonsManager, engine:
|
| 157 |
let (speaker_transcript, _) = tokio::sync::broadcast::channel::<Event>(128);
|
| 158 |
let shared_speaker_transcript = speaker_transcript.clone();
|
| 159 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel::<Vec<i16>>(128);
|
|
|
|
| 5 |
use futures_util::future::try_join;
|
| 6 |
use serde::{Deserialize, Serialize};
|
| 7 |
use std::collections::BTreeMap;
|
| 8 |
+
use std::fmt::{Debug, Formatter};
|
| 9 |
use std::io::BufRead;
|
| 10 |
use std::ops::Deref;
|
| 11 |
use std::sync::{Arc, Weak};
|
| 12 |
use tokio::sync::RwLock;
|
| 13 |
+
use tracing::{error};
|
| 14 |
|
| 15 |
use tokio::select;
|
| 16 |
+
use crate::asr::{Event, aws::AwsAsr, ASR};
|
| 17 |
|
| 18 |
#[cfg(feature = "whisper")]
|
| 19 |
+
use crate::asr::whisper::WhisperAsr;
|
| 20 |
|
| 21 |
pub struct InnerLessonsManager {
|
| 22 |
translate_client: aws_sdk_translate::Client,
|
|
|
|
| 43 |
}
|
| 44 |
}
|
| 45 |
|
| 46 |
+
pub(crate) enum AsrEngine {
|
| 47 |
AWS,
|
| 48 |
+
#[allow(dead_code)]
|
| 49 |
#[cfg(feature = "whisper")]
|
| 50 |
Whisper,
|
| 51 |
}
|
| 52 |
|
| 53 |
+
impl AsrEngine {
|
| 54 |
async fn create(self, lang: LanguageCode) -> Box<dyn ASR + Send> {
|
| 55 |
match self {
|
| 56 |
+
AsrEngine::AWS => Box::new(
|
| 57 |
+
AwsAsr::from_env(lang)
|
| 58 |
.await
|
| 59 |
.expect("Failed to initialize AWS ASR")
|
| 60 |
),
|
| 61 |
#[cfg(feature = "whisper")]
|
| 62 |
+
AsrEngine::Whisper => Box::new(
|
| 63 |
+
WhisperAsr::from_config()
|
| 64 |
.await
|
| 65 |
.expect("Failed to initialize Whisper ASR")
|
| 66 |
),
|
|
|
|
| 80 |
LessonsManager { inner: Arc::new(inner) }
|
| 81 |
}
|
| 82 |
|
| 83 |
+
pub(crate) async fn create_lesson(&self, id: u32, engine: AsrEngine, speaker_lang: LanguageCode) -> Lesson {
|
| 84 |
let mut map = self.lessons.write().await;
|
| 85 |
let lesson: Lesson = InnerLesson::new(self.clone(), engine, speaker_lang).await.into();
|
| 86 |
map.insert(id, lesson.clone());
|
|
|
|
| 154 |
}
|
| 155 |
|
| 156 |
impl InnerLesson {
|
| 157 |
+
async fn new(parent: LessonsManager, engine: AsrEngine, speaker_lang: LanguageCode) -> InnerLesson {
|
| 158 |
let (speaker_transcript, _) = tokio::sync::broadcast::channel::<Event>(128);
|
| 159 |
let shared_speaker_transcript = speaker_transcript.clone();
|
| 160 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel::<Vec<i16>>(128);
|
src/main.rs
CHANGED
|
@@ -7,7 +7,10 @@
|
|
| 7 |
|
| 8 |
#[cfg(feature = "whisper")]
|
| 9 |
extern crate whisper;
|
|
|
|
|
|
|
| 10 |
use aws_sdk_transcribestreaming::meta::PKG_VERSION;
|
|
|
|
| 11 |
use futures_util::{stream::StreamExt, SinkExt};
|
| 12 |
use poem::{
|
| 13 |
endpoint::{StaticFileEndpoint, StaticFilesEndpoint},
|
|
@@ -44,7 +47,7 @@ async fn main() -> Result<(), std::io::Error> {
|
|
| 44 |
|
| 45 |
debug!("Transcribe client version: {}", PKG_VERSION);
|
| 46 |
|
| 47 |
-
let shared_config = aws_config::
|
| 48 |
let ctx = Context {
|
| 49 |
lessons_manager: LessonsManager::new(&shared_config),
|
| 50 |
};
|
|
@@ -57,7 +60,9 @@ async fn main() -> Result<(), std::io::Error> {
|
|
| 57 |
.index_file("index.html"),
|
| 58 |
)
|
| 59 |
.at("/ws/lesson-speaker", get(stream_speaker))
|
|
|
|
| 60 |
.at("/ws/lesson-listener", get(stream_listener))
|
|
|
|
| 61 |
.at(
|
| 62 |
"lesson-speaker",
|
| 63 |
StaticFileEndpoint::new("./static/index.html"),
|
|
@@ -83,7 +88,8 @@ async fn main() -> Result<(), std::io::Error> {
|
|
| 83 |
#[derive(Deserialize, Debug)]
|
| 84 |
pub struct LessonSpeakerQuery {
|
| 85 |
id: u32,
|
| 86 |
-
|
|
|
|
| 87 |
prompt: Option<String>,
|
| 88 |
}
|
| 89 |
|
|
@@ -93,51 +99,58 @@ async fn stream_speaker(
|
|
| 93 |
query: Query<LessonSpeakerQuery>,
|
| 94 |
ws: WebSocket,
|
| 95 |
) -> impl IntoResponse {
|
| 96 |
-
let
|
| 97 |
-
.lessons_manager
|
| 98 |
-
.create_lesson(
|
| 99 |
-
query.id,
|
| 100 |
-
ASR_Engine::AWS,
|
| 101 |
-
query.lang.clone().parse().expect("Not supported lang"),
|
| 102 |
-
)
|
| 103 |
-
.await;
|
| 104 |
-
let prompt = query.prompt.clone().unwrap_or_default();
|
| 105 |
-
|
| 106 |
ws.on_upgrade(|mut socket| async move {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
let mut transcribe_rx = lesson.transcript_channel();
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
Some(
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
tracing::warn!("failed to send voice: {}", e);
|
| 116 |
-
break;
|
| 117 |
-
}
|
| 118 |
-
},
|
| 119 |
-
Some(Ok(_)) => {
|
| 120 |
tracing::warn!("Other: {:?}", msg);
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
}
|
| 129 |
-
break;
|
| 130 |
-
}
|
| 131 |
-
}
|
| 132 |
-
},
|
| 133 |
-
output = transcribe_rx.recv() => {
|
| 134 |
-
if let Ok(evt) = output {
|
| 135 |
tracing::trace!("Transcribed: {}", evt.transcript);
|
| 136 |
-
let evt = LiveLessonTextEvent::Transcription {
|
| 137 |
-
let json = serde_json::to_string(&evt)
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
}
|
| 142 |
}
|
| 143 |
})
|
|
@@ -146,26 +159,29 @@ async fn stream_speaker(
|
|
| 146 |
#[derive(Deserialize, Debug)]
|
| 147 |
pub struct LessonListenerQuery {
|
| 148 |
id: u32,
|
| 149 |
-
|
| 150 |
voice: String,
|
| 151 |
}
|
| 152 |
|
| 153 |
-
#[derive(Serialize)]
|
| 154 |
#[serde(tag = "type")]
|
| 155 |
enum LiveLessonTextEvent {
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
LipSync { visemes: Vec<Viseme> },
|
| 159 |
}
|
| 160 |
-
|
| 161 |
#[handler]
|
| 162 |
async fn stream_listener(
|
| 163 |
ctx: Data<&Context>,
|
| 164 |
query: Query<LessonListenerQuery>,
|
| 165 |
ws: WebSocket,
|
| 166 |
) -> impl IntoResponse {
|
| 167 |
-
let
|
| 168 |
-
debug!("listener param = {:?}", query);
|
| 169 |
|
| 170 |
ws.on_upgrade(|mut socket| async move {
|
| 171 |
let voice_id = match query.voice.parse() {
|
|
@@ -177,6 +193,9 @@ async fn stream_listener(
|
|
| 177 |
return;
|
| 178 |
}
|
| 179 |
};
|
|
|
|
|
|
|
|
|
|
| 180 |
let Some(lesson) = lesson_opt else {
|
| 181 |
let _ = socket
|
| 182 |
.send(Message::Text("lesson not found".to_string()))
|
|
@@ -184,54 +203,62 @@ async fn stream_listener(
|
|
| 184 |
return;
|
| 185 |
};
|
| 186 |
let mut transcript_rx = lesson.transcript_channel();
|
| 187 |
-
let mut lang_lesson = lesson.get_or_init(query.
|
| 188 |
let mut translate_rx = lang_lesson.translated_channel();
|
| 189 |
let mut voice_lesson = lang_lesson.get_or_init(voice_id).await;
|
| 190 |
let mut voice_rx = voice_lesson.voice_channel();
|
| 191 |
let mut lip_sync_rx = voice_lesson.lip_sync_channel();
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
let
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
}
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
let evt = LiveLessonTextEvent::Translation {
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
let _ = socket.send(Message::Binary(voice)).await;
|
| 226 |
-
}
|
| 227 |
-
},
|
| 228 |
-
visemes = lip_sync_rx.recv() => {
|
| 229 |
-
if let Ok(visemes) = visemes {
|
| 230 |
let evt = LiveLessonTextEvent::LipSync { visemes };
|
| 231 |
-
let json = serde_json::to_string(&evt)
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
}
|
| 236 |
}
|
| 237 |
})
|
|
|
|
| 7 |
|
| 8 |
#[cfg(feature = "whisper")]
|
| 9 |
extern crate whisper;
|
| 10 |
+
|
| 11 |
+
use aws_config::BehaviorVersion;
|
| 12 |
use aws_sdk_transcribestreaming::meta::PKG_VERSION;
|
| 13 |
+
use aws_sdk_transcribestreaming::types::LanguageCode;
|
| 14 |
use futures_util::{stream::StreamExt, SinkExt};
|
| 15 |
use poem::{
|
| 16 |
endpoint::{StaticFileEndpoint, StaticFilesEndpoint},
|
|
|
|
| 47 |
|
| 48 |
debug!("Transcribe client version: {}", PKG_VERSION);
|
| 49 |
|
| 50 |
+
let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await;
|
| 51 |
let ctx = Context {
|
| 52 |
lessons_manager: LessonsManager::new(&shared_config),
|
| 53 |
};
|
|
|
|
| 60 |
.index_file("index.html"),
|
| 61 |
)
|
| 62 |
.at("/ws/lesson-speaker", get(stream_speaker))
|
| 63 |
+
.at("/ws/teacher", get(stream_speaker))
|
| 64 |
.at("/ws/lesson-listener", get(stream_listener))
|
| 65 |
+
.at("/ws/student", get(stream_listener))
|
| 66 |
.at(
|
| 67 |
"lesson-speaker",
|
| 68 |
StaticFileEndpoint::new("./static/index.html"),
|
|
|
|
| 88 |
#[derive(Deserialize, Debug)]
|
| 89 |
pub struct LessonSpeakerQuery {
|
| 90 |
id: u32,
|
| 91 |
+
language: String,
|
| 92 |
+
#[allow(dead_code)] // TODO: use this in the future
|
| 93 |
prompt: Option<String>,
|
| 94 |
}
|
| 95 |
|
|
|
|
| 99 |
query: Query<LessonSpeakerQuery>,
|
| 100 |
ws: WebSocket,
|
| 101 |
) -> impl IntoResponse {
|
| 102 |
+
let lessons_manager = ctx.lessons_manager.clone();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
ws.on_upgrade(|mut socket| async move {
|
| 104 |
+
let Ok(lang) = query.language.parse::<LanguageCode>() else {
|
| 105 |
+
let _ = socket
|
| 106 |
+
.send(Message::Text(format!("invalid language code: {}", query.language)))
|
| 107 |
+
.await;
|
| 108 |
+
return
|
| 109 |
+
};
|
| 110 |
+
let lesson = lessons_manager
|
| 111 |
+
.create_lesson(
|
| 112 |
+
query.id,
|
| 113 |
+
AsrEngine::AWS,
|
| 114 |
+
lang,
|
| 115 |
+
)
|
| 116 |
+
.await;
|
| 117 |
+
|
| 118 |
let mut transcribe_rx = lesson.transcript_channel();
|
| 119 |
+
let fut = async {
|
| 120 |
+
loop {
|
| 121 |
+
select! {
|
| 122 |
+
msg = socket.next() => {
|
| 123 |
+
let Some(res) = msg else { break };
|
| 124 |
+
let msg = res?;
|
| 125 |
+
let Message::Binary(bin) = msg else {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
tracing::warn!("Other: {:?}", msg);
|
| 127 |
+
continue
|
| 128 |
+
};
|
| 129 |
+
let frame = u8_to_i16(&bin);
|
| 130 |
+
lesson.send(frame).await?
|
| 131 |
+
},
|
| 132 |
+
output = transcribe_rx.recv() => {
|
| 133 |
+
let evt = output?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
tracing::trace!("Transcribed: {}", evt.transcript);
|
| 135 |
+
let evt = LiveLessonTextEvent::Transcription { content: evt.transcript, is_final: evt.is_final };
|
| 136 |
+
let Ok(json) = serde_json::to_string(&evt) else {
|
| 137 |
+
tracing::warn!("failed to serialize json: {:?}", evt);
|
| 138 |
+
continue
|
| 139 |
+
};
|
| 140 |
+
socket.send(Message::Text(json)).await?
|
| 141 |
+
},
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
Ok(())
|
| 145 |
+
};
|
| 146 |
+
|
| 147 |
+
let res: anyhow::Result<()> = fut.await;
|
| 148 |
+
match res {
|
| 149 |
+
Ok(()) => {
|
| 150 |
+
tracing::info!("lesson speaker closed");
|
| 151 |
+
}
|
| 152 |
+
Err(e) => {
|
| 153 |
+
tracing::warn!("lesson speaker error: {}", e);
|
| 154 |
}
|
| 155 |
}
|
| 156 |
})
|
|
|
|
| 159 |
#[derive(Deserialize, Debug)]
|
| 160 |
pub struct LessonListenerQuery {
|
| 161 |
id: u32,
|
| 162 |
+
language: String,
|
| 163 |
voice: String,
|
| 164 |
}
|
| 165 |
|
| 166 |
+
#[derive(Serialize, Debug)]
|
| 167 |
#[serde(tag = "type")]
|
| 168 |
enum LiveLessonTextEvent {
|
| 169 |
+
#[serde(rename = "original")]
|
| 170 |
+
Transcription {
|
| 171 |
+
content: String,
|
| 172 |
+
#[serde(rename = "isFinal")]
|
| 173 |
+
is_final: bool
|
| 174 |
+
},
|
| 175 |
+
Translation { content: String },
|
| 176 |
LipSync { visemes: Vec<Viseme> },
|
| 177 |
}
|
|
|
|
| 178 |
#[handler]
|
| 179 |
async fn stream_listener(
|
| 180 |
ctx: Data<&Context>,
|
| 181 |
query: Query<LessonListenerQuery>,
|
| 182 |
ws: WebSocket,
|
| 183 |
) -> impl IntoResponse {
|
| 184 |
+
let lessons_manager = ctx.lessons_manager.clone();
|
|
|
|
| 185 |
|
| 186 |
ws.on_upgrade(|mut socket| async move {
|
| 187 |
let voice_id = match query.voice.parse() {
|
|
|
|
| 193 |
return;
|
| 194 |
}
|
| 195 |
};
|
| 196 |
+
|
| 197 |
+
let lesson_opt = lessons_manager.get_lesson(query.id).await;
|
| 198 |
+
debug!("listener param = {:?}", query);
|
| 199 |
let Some(lesson) = lesson_opt else {
|
| 200 |
let _ = socket
|
| 201 |
.send(Message::Text("lesson not found".to_string()))
|
|
|
|
| 203 |
return;
|
| 204 |
};
|
| 205 |
let mut transcript_rx = lesson.transcript_channel();
|
| 206 |
+
let mut lang_lesson = lesson.get_or_init(query.language.clone()).await;
|
| 207 |
let mut translate_rx = lang_lesson.translated_channel();
|
| 208 |
let mut voice_lesson = lang_lesson.get_or_init(voice_id).await;
|
| 209 |
let mut voice_rx = voice_lesson.voice_channel();
|
| 210 |
let mut lip_sync_rx = voice_lesson.lip_sync_channel();
|
| 211 |
|
| 212 |
+
let fut = async {
|
| 213 |
+
loop {
|
| 214 |
+
select! {
|
| 215 |
+
transcript_poll = transcript_rx.recv() => {
|
| 216 |
+
let transcript = transcript_poll?;
|
| 217 |
+
let evt = LiveLessonTextEvent::Transcription {
|
| 218 |
+
content: transcript.transcript,
|
| 219 |
+
is_final: transcript.is_final
|
| 220 |
+
};
|
| 221 |
+
let Ok(json) = serde_json::to_string(&evt) else {
|
| 222 |
+
tracing::warn!("failed to serialize: {:?}", evt);
|
| 223 |
+
continue
|
| 224 |
+
};
|
| 225 |
+
tracing::debug!("Transcribed: {}", json);
|
| 226 |
+
socket.send(Message::Text(json)).await?
|
| 227 |
+
},
|
| 228 |
+
translated_poll = translate_rx.recv() => {
|
| 229 |
+
let translated = translated_poll?;
|
| 230 |
+
let evt = LiveLessonTextEvent::Translation { content: translated };
|
| 231 |
+
let Ok(json) = serde_json::to_string(&evt) else {
|
| 232 |
+
tracing::warn!("failed to serialize: {:?}", evt);
|
| 233 |
+
continue
|
| 234 |
+
};
|
| 235 |
+
tracing::debug!("Translated: {}", json);
|
| 236 |
+
socket.send(Message::Text(json)).await?
|
| 237 |
+
},
|
| 238 |
+
voice_poll = voice_rx.recv() => {
|
| 239 |
+
let voice = voice_poll?;
|
| 240 |
+
socket.send(Message::Binary(voice)).await?
|
| 241 |
+
},
|
| 242 |
+
visemes_poll = lip_sync_rx.recv() => {
|
| 243 |
+
let visemes = visemes_poll?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
let evt = LiveLessonTextEvent::LipSync { visemes };
|
| 245 |
+
let Ok(json) = serde_json::to_string(&evt) else {
|
| 246 |
+
tracing::warn!("failed to serialize: {:?}", evt);
|
| 247 |
+
continue
|
| 248 |
+
};
|
| 249 |
+
socket.send(Message::Text(json)).await?
|
| 250 |
+
},
|
| 251 |
+
}
|
| 252 |
+
}
|
| 253 |
+
};
|
| 254 |
+
|
| 255 |
+
let res: anyhow::Result<()> = fut.await;
|
| 256 |
+
match res {
|
| 257 |
+
Ok(()) => {
|
| 258 |
+
tracing::info!("lesson listener closed");
|
| 259 |
+
}
|
| 260 |
+
Err(e) => {
|
| 261 |
+
tracing::warn!("lesson listener error: {}", e);
|
| 262 |
}
|
| 263 |
}
|
| 264 |
})
|
static/index.html
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
| 6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 7 |
<title>Vite + React</title>
|
| 8 |
-
<script type="module" crossorigin src="/assets/index-
|
| 9 |
<link rel="stylesheet" href="/assets/index-983f9492.css">
|
| 10 |
</head>
|
| 11 |
<body>
|
|
|
|
| 5 |
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
| 6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 7 |
<title>Vite + React</title>
|
| 8 |
+
<script type="module" crossorigin src="/assets/index-b40ca4ea.js"></script>
|
| 9 |
<link rel="stylesheet" href="/assets/index-983f9492.css">
|
| 10 |
</head>
|
| 11 |
<body>
|
whisper/src/config.rs
CHANGED
|
@@ -9,7 +9,6 @@ pub struct WhisperConfig {
|
|
| 9 |
pub(crate) keep_ms: usize,
|
| 10 |
pub model: String,
|
| 11 |
pub(crate) max_prompt_tokens: usize,
|
| 12 |
-
pub(crate) context_confidence_threshold: f32,
|
| 13 |
}
|
| 14 |
|
| 15 |
#[allow(dead_code)]
|
|
|
|
| 9 |
pub(crate) keep_ms: usize,
|
| 10 |
pub model: String,
|
| 11 |
pub(crate) max_prompt_tokens: usize,
|
|
|
|
| 12 |
}
|
| 13 |
|
| 14 |
#[allow(dead_code)]
|
whisper/src/handler.rs
CHANGED
|
@@ -6,7 +6,7 @@ use std::{
|
|
| 6 |
};
|
| 7 |
use fvad::SampleRate;
|
| 8 |
|
| 9 |
-
use tokio::sync::{broadcast, mpsc, oneshot
|
| 10 |
use tokio::time::Instant;
|
| 11 |
use tracing::{warn};
|
| 12 |
use whisper_rs::{convert_integer_to_float_audio, WhisperContext, WhisperError, WhisperState, WhisperToken, WhisperTokenData};
|
|
@@ -30,18 +30,6 @@ impl <'a> Context {
|
|
| 30 |
}
|
| 31 |
}
|
| 32 |
|
| 33 |
-
static WHISPER_CONTEXT: OnceCell<WhisperContext> = OnceCell::const_new();
|
| 34 |
-
|
| 35 |
-
async fn initialize_whisper_context(model: String) -> WhisperContext {
|
| 36 |
-
tokio::task::spawn_blocking(move || {
|
| 37 |
-
WhisperContext::new(&model).expect("failed to create WhisperContext")
|
| 38 |
-
}).await.expect("failed to spawn")
|
| 39 |
-
}
|
| 40 |
-
|
| 41 |
-
async fn get_whisper_context(model: String) -> &'static WhisperContext {
|
| 42 |
-
WHISPER_CONTEXT.get_or_init(|| initialize_whisper_context(model)).await
|
| 43 |
-
}
|
| 44 |
-
|
| 45 |
#[derive(Debug)]
|
| 46 |
pub enum Error {
|
| 47 |
WhisperError {
|
|
|
|
| 6 |
};
|
| 7 |
use fvad::SampleRate;
|
| 8 |
|
| 9 |
+
use tokio::sync::{broadcast, mpsc, oneshot};
|
| 10 |
use tokio::time::Instant;
|
| 11 |
use tracing::{warn};
|
| 12 |
use whisper_rs::{convert_integer_to_float_audio, WhisperContext, WhisperError, WhisperState, WhisperToken, WhisperTokenData};
|
|
|
|
| 30 |
}
|
| 31 |
}
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
#[derive(Debug)]
|
| 34 |
pub enum Error {
|
| 35 |
WhisperError {
|