Spaces:
Build error
Build error
split whisper into a standalone crate
Browse files- src/asr/aws.rs +167 -4
- src/lesson.rs +11 -115
- src/main.rs +21 -25
- whisper/src/handler.rs +0 -15
src/asr/aws.rs
CHANGED
|
@@ -1,17 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
use async_trait::async_trait;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
use tokio::sync::broadcast::Receiver;
|
|
|
|
|
|
|
| 3 |
use crate::asr::{ASR, Event};
|
| 4 |
|
| 5 |
-
struct AWS_ASR {
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
#[async_trait]
|
| 9 |
impl ASR for AWS_ASR {
|
| 10 |
async fn frame(&mut self, frame: &[i16]) -> anyhow::Result<()> {
|
| 11 |
-
|
| 12 |
}
|
| 13 |
|
| 14 |
fn subscribe(&mut self) -> Receiver<Event> {
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
}
|
| 17 |
}
|
|
|
|
| 1 |
+
use std::error::Error;
|
| 2 |
+
use std::fmt::{Display, Formatter};
|
| 3 |
+
use async_stream::stream;
|
| 4 |
use async_trait::async_trait;
|
| 5 |
+
use aws_sdk_transcribestreaming::operation::start_stream_transcription::StartStreamTranscriptionOutput;
|
| 6 |
+
use aws_sdk_transcribestreaming::primitives::Blob;
|
| 7 |
+
use aws_sdk_transcribestreaming::types::{
|
| 8 |
+
AudioEvent, AudioStream, LanguageCode, MediaEncoding, TranscriptResultStream,
|
| 9 |
+
};
|
| 10 |
+
use tokio::select;
|
| 11 |
use tokio::sync::broadcast::Receiver;
|
| 12 |
+
use tokio_stream::Stream;
|
| 13 |
+
use futures_util::TryStreamExt;
|
| 14 |
use crate::asr::{ASR, Event};
|
| 15 |
|
| 16 |
+
pub struct AWS_ASR {
|
| 17 |
+
client: aws_sdk_transcribestreaming::Client,
|
| 18 |
+
speaker_voice_channel: tokio::sync::mpsc::Sender<Vec<i16>>,
|
| 19 |
+
speaker_transcript: tokio::sync::broadcast::Sender<Event>,
|
| 20 |
+
drop_handler: Option<tokio::sync::oneshot::Sender<()>>,
|
| 21 |
}
|
| 22 |
+
|
| 23 |
+
impl AWS_ASR {
|
| 24 |
+
pub async fn from_env(lang: LanguageCode) -> anyhow::Result<Self> {
|
| 25 |
+
let config = aws_config::load_from_env().await;
|
| 26 |
+
let transcript_client = aws_sdk_transcribestreaming::Client::new(&config);
|
| 27 |
+
let client = transcript_client.clone();
|
| 28 |
+
|
| 29 |
+
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel::<Vec<i16>>(128);
|
| 30 |
+
let (speaker_transcript, _) = tokio::sync::broadcast::channel::<Event>(128);
|
| 31 |
+
let shared_speaker_transcript = speaker_transcript.clone();
|
| 32 |
+
|
| 33 |
+
let (drop_handler, drop_rx) = tokio::sync::oneshot::channel::<()>();
|
| 34 |
+
|
| 35 |
+
tokio::spawn(async move {
|
| 36 |
+
let fut = async {
|
| 37 |
+
let input_stream = stream! {
|
| 38 |
+
while let Some(raw) = speaker_voice_rx.recv().await {
|
| 39 |
+
let reshape = slice_i16_to_u8(&raw);
|
| 40 |
+
yield Ok(AudioStream::AudioEvent(AudioEvent::builder().audio_chunk(Blob::new(reshape)).build()));
|
| 41 |
+
}
|
| 42 |
+
};
|
| 43 |
+
let output = transcript_client
|
| 44 |
+
.start_stream_transcription()
|
| 45 |
+
.language_code(lang) //LanguageCode::EnGb
|
| 46 |
+
.media_sample_rate_hertz(16000)
|
| 47 |
+
.media_encoding(MediaEncoding::Pcm)
|
| 48 |
+
.audio_stream(input_stream.into())
|
| 49 |
+
.send()
|
| 50 |
+
.await
|
| 51 |
+
.map_err(|e| StreamTranscriptionError::EstablishStreamError(Box::new(e)))?;
|
| 52 |
+
|
| 53 |
+
let output_stream = to_stream(output);
|
| 54 |
+
output_stream
|
| 55 |
+
.try_for_each(|text| async {
|
| 56 |
+
let _ = shared_speaker_transcript.send(text);
|
| 57 |
+
Ok(())
|
| 58 |
+
})
|
| 59 |
+
.await?;
|
| 60 |
+
Ok(()) as anyhow::Result<()>
|
| 61 |
+
};
|
| 62 |
+
select! {
|
| 63 |
+
res = fut => {
|
| 64 |
+
if let Err(e) = res {
|
| 65 |
+
println!("Error: {:?}", e);
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
_ = drop_rx => {}
|
| 69 |
+
}
|
| 70 |
+
});
|
| 71 |
+
|
| 72 |
+
Ok(Self {
|
| 73 |
+
client,
|
| 74 |
+
speaker_voice_channel,
|
| 75 |
+
speaker_transcript,
|
| 76 |
+
drop_handler: Some(drop_handler)
|
| 77 |
+
})
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
#[allow(dead_code)]
|
| 82 |
+
fn slice_i16_to_u8(slice: &[i16]) -> Vec<u8> {
|
| 83 |
+
slice
|
| 84 |
+
.iter()
|
| 85 |
+
.flat_map(|&sample| {
|
| 86 |
+
[(sample >> 8) as u8, sample as u8]
|
| 87 |
+
})
|
| 88 |
+
.collect()
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
impl Drop for AWS_ASR {
|
| 92 |
+
fn drop(&mut self) {
|
| 93 |
+
if let Some(drop_handler) = self.drop_handler.take() {
|
| 94 |
+
let _ = drop_handler.send(());
|
| 95 |
+
}
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
|
| 100 |
#[async_trait]
|
| 101 |
impl ASR for AWS_ASR {
|
| 102 |
async fn frame(&mut self, frame: &[i16]) -> anyhow::Result<()> {
|
| 103 |
+
Ok(self.speaker_voice_channel.send(frame.to_vec()).await?)
|
| 104 |
}
|
| 105 |
|
| 106 |
fn subscribe(&mut self) -> Receiver<Event> {
|
| 107 |
+
self.speaker_transcript.subscribe()
|
| 108 |
+
}
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
#[allow(dead_code)]
|
| 112 |
+
fn to_stream(
|
| 113 |
+
mut output: StartStreamTranscriptionOutput,
|
| 114 |
+
) -> impl Stream<Item = Result<Event, StreamTranscriptionError>> {
|
| 115 |
+
stream! {
|
| 116 |
+
while let Some(event) = output
|
| 117 |
+
.transcript_result_stream
|
| 118 |
+
.recv()
|
| 119 |
+
.await
|
| 120 |
+
.map_err(|e| StreamTranscriptionError::TranscriptResultStreamError(Box::new(e)))? {
|
| 121 |
+
match event {
|
| 122 |
+
TranscriptResultStream::TranscriptEvent(transcript_event) => {
|
| 123 |
+
let Some(transcript) = transcript_event.transcript else {
|
| 124 |
+
continue
|
| 125 |
+
};
|
| 126 |
+
|
| 127 |
+
for result in transcript.results.unwrap_or_default() {
|
| 128 |
+
let Some(alternatives) = result.alternatives else {
|
| 129 |
+
continue
|
| 130 |
+
};
|
| 131 |
+
let Some(first_alternative) = alternatives.first() else {
|
| 132 |
+
continue
|
| 133 |
+
};
|
| 134 |
+
let Some(text) = &first_alternative.transcript else {
|
| 135 |
+
continue
|
| 136 |
+
};
|
| 137 |
+
let evt = Event {
|
| 138 |
+
transcript: text.clone(),
|
| 139 |
+
is_final: !result.is_partial,
|
| 140 |
+
};
|
| 141 |
+
yield Ok(evt);
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
_ => yield Err(StreamTranscriptionError::Unknown),
|
| 145 |
+
}
|
| 146 |
+
}
|
| 147 |
+
}
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
#[derive(Debug)]
|
| 152 |
+
enum StreamTranscriptionError {
|
| 153 |
+
EstablishStreamError(Box<dyn Error + Send + Sync>),
|
| 154 |
+
TranscriptResultStreamError(Box<dyn Error + Send + Sync>),
|
| 155 |
+
Unknown,
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
impl Display for StreamTranscriptionError {
|
| 159 |
+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
| 160 |
+
match self {
|
| 161 |
+
StreamTranscriptionError::EstablishStreamError(e) => {
|
| 162 |
+
write!(f, "EstablishStreamError: {}", e)
|
| 163 |
+
}
|
| 164 |
+
StreamTranscriptionError::TranscriptResultStreamError(e) => {
|
| 165 |
+
write!(f, "TranscriptResultStreamError: {}", e)
|
| 166 |
+
}
|
| 167 |
+
StreamTranscriptionError::Unknown => write!(f, "Unknown"),
|
| 168 |
+
}
|
| 169 |
+
}
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
impl Error for StreamTranscriptionError {
|
| 173 |
+
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
| 174 |
+
match self {
|
| 175 |
+
StreamTranscriptionError::EstablishStreamError(e) => Some(e.as_ref()),
|
| 176 |
+
StreamTranscriptionError::TranscriptResultStreamError(e) => Some(e.as_ref()),
|
| 177 |
+
StreamTranscriptionError::Unknown => None,
|
| 178 |
+
}
|
| 179 |
}
|
| 180 |
}
|
src/lesson.rs
CHANGED
|
@@ -1,41 +1,32 @@
|
|
| 1 |
-
use async_stream::stream;
|
| 2 |
use aws_config::SdkConfig;
|
| 3 |
use aws_sdk_polly::primitives::ByteStream;
|
| 4 |
use aws_sdk_polly::types::{Engine, OutputFormat, SpeechMarkType, VoiceId};
|
| 5 |
-
use aws_sdk_transcribestreaming::
|
| 6 |
-
use aws_sdk_transcribestreaming::primitives::Blob;
|
| 7 |
-
use aws_sdk_transcribestreaming::types::{
|
| 8 |
-
AudioEvent, AudioStream, LanguageCode, MediaEncoding, TranscriptResultStream,
|
| 9 |
-
};
|
| 10 |
use futures_util::future::try_join;
|
| 11 |
-
use futures_util::{Stream, TryStreamExt};
|
| 12 |
use serde::{Deserialize, Serialize};
|
| 13 |
use std::collections::BTreeMap;
|
| 14 |
-
use std::
|
| 15 |
-
use std::fmt::{Display, Formatter};
|
| 16 |
use std::io::BufRead;
|
| 17 |
use std::sync::{Arc, Weak};
|
| 18 |
use tokio::sync::RwLock;
|
| 19 |
|
| 20 |
use tokio::select;
|
|
|
|
| 21 |
|
| 22 |
#[derive(Clone, Debug)]
|
| 23 |
pub struct LessonsManager {
|
| 24 |
translate_client: aws_sdk_translate::Client,
|
| 25 |
polly_client: aws_sdk_polly::Client,
|
| 26 |
-
transcript_client: aws_sdk_transcribestreaming::Client,
|
| 27 |
lessons: Arc<RwLock<BTreeMap<u32, Lesson>>>,
|
| 28 |
}
|
| 29 |
|
| 30 |
impl LessonsManager {
|
| 31 |
pub(crate) fn new(sdk_config: &SdkConfig) -> Self {
|
| 32 |
-
let transcript_client = aws_sdk_transcribestreaming::Client::new(sdk_config);
|
| 33 |
let translate_client = aws_sdk_translate::Client::new(sdk_config);
|
| 34 |
let polly_client = aws_sdk_polly::Client::new(sdk_config);
|
| 35 |
LessonsManager {
|
| 36 |
translate_client,
|
| 37 |
polly_client,
|
| 38 |
-
transcript_client,
|
| 39 |
lessons: Arc::new(RwLock::new(BTreeMap::new())),
|
| 40 |
}
|
| 41 |
}
|
|
@@ -78,11 +69,11 @@ impl Lesson {
|
|
| 78 |
}
|
| 79 |
}
|
| 80 |
|
| 81 |
-
pub(crate) fn
|
| 82 |
-
self.inner.speaker_voice_channel.
|
| 83 |
}
|
| 84 |
|
| 85 |
-
pub(crate) fn transcript_channel(&self) -> tokio::sync::broadcast::Receiver<
|
| 86 |
self.inner.speaker_transcript.subscribe()
|
| 87 |
}
|
| 88 |
}
|
|
@@ -99,56 +90,17 @@ impl From<InnerLesson> for Lesson {
|
|
| 99 |
struct InnerLesson {
|
| 100 |
parent: LessonsManager,
|
| 101 |
speaker_lang: LanguageCode,
|
| 102 |
-
speaker_voice_channel: tokio::sync::mpsc::Sender<Vec<
|
| 103 |
-
speaker_transcript: tokio::sync::broadcast::Sender<
|
| 104 |
lang_lessons: RwLock<BTreeMap<String, Weak<InnerLangLesson>>>,
|
| 105 |
drop_handler: Option<tokio::sync::oneshot::Sender<Signal>>,
|
| 106 |
}
|
| 107 |
|
| 108 |
impl InnerLesson {
|
| 109 |
fn new(parent: LessonsManager, speaker_lang: LanguageCode) -> InnerLesson {
|
| 110 |
-
let (speaker_transcript, _) = tokio::sync::broadcast::channel::<
|
| 111 |
-
let shared_speaker_transcript = speaker_transcript.clone();
|
| 112 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel(128);
|
| 113 |
let (drop_handler, drop_rx) = tokio::sync::oneshot::channel::<Signal>();
|
| 114 |
-
let transcript_client = parent.transcript_client.clone();
|
| 115 |
-
let shared_speak_lang = speaker_lang.clone();
|
| 116 |
-
|
| 117 |
-
tokio::spawn(async move {
|
| 118 |
-
let fut = async {
|
| 119 |
-
let input_stream = stream! {
|
| 120 |
-
while let Some(raw) = speaker_voice_rx.recv().await {
|
| 121 |
-
yield Ok(AudioStream::AudioEvent(AudioEvent::builder().audio_chunk(Blob::new(raw)).build()));
|
| 122 |
-
}
|
| 123 |
-
};
|
| 124 |
-
let output = transcript_client
|
| 125 |
-
.start_stream_transcription()
|
| 126 |
-
.language_code(shared_speak_lang) //LanguageCode::EnGb
|
| 127 |
-
.media_sample_rate_hertz(16000)
|
| 128 |
-
.media_encoding(MediaEncoding::Pcm)
|
| 129 |
-
.audio_stream(input_stream.into())
|
| 130 |
-
.send()
|
| 131 |
-
.await
|
| 132 |
-
.map_err(|e| StreamTranscriptionError::EstablishStreamError(Box::new(e)))?;
|
| 133 |
-
|
| 134 |
-
let output_stream = to_stream(output);
|
| 135 |
-
output_stream
|
| 136 |
-
.try_for_each(|text| async {
|
| 137 |
-
let _ = shared_speaker_transcript.send(text);
|
| 138 |
-
Ok(())
|
| 139 |
-
})
|
| 140 |
-
.await?;
|
| 141 |
-
Ok(()) as Result<(), StreamTranscriptionError>
|
| 142 |
-
};
|
| 143 |
-
select! {
|
| 144 |
-
res = fut => {
|
| 145 |
-
if let Err(e) = res {
|
| 146 |
-
println!("Error: {:?}", e);
|
| 147 |
-
}
|
| 148 |
-
}
|
| 149 |
-
_ = drop_rx => {}
|
| 150 |
-
}
|
| 151 |
-
});
|
| 152 |
|
| 153 |
InnerLesson {
|
| 154 |
parent,
|
|
@@ -220,10 +172,10 @@ impl LangLesson {
|
|
| 220 |
let (drop_handler, drop_rx) = tokio::sync::oneshot::channel::<Signal>();
|
| 221 |
tokio::spawn(async move {
|
| 222 |
let fut = async {
|
| 223 |
-
while let Ok(
|
| 224 |
let output = translate_client
|
| 225 |
.translate_text()
|
| 226 |
-
.text(
|
| 227 |
.source_language_code(shared_speaker_lang.as_str())
|
| 228 |
.target_language_code(shared_lang.clone())
|
| 229 |
.send()
|
|
@@ -375,31 +327,6 @@ impl Drop for InnerVoiceLesson {
|
|
| 375 |
}
|
| 376 |
}
|
| 377 |
|
| 378 |
-
fn to_stream(
|
| 379 |
-
mut output: StartStreamTranscriptionOutput,
|
| 380 |
-
) -> impl Stream<Item = Result<String, StreamTranscriptionError>> {
|
| 381 |
-
stream! {
|
| 382 |
-
while let Some(event) = output
|
| 383 |
-
.transcript_result_stream
|
| 384 |
-
.recv()
|
| 385 |
-
.await
|
| 386 |
-
.map_err(|e| StreamTranscriptionError::TranscriptResultStreamError(Box::new(e)))? {
|
| 387 |
-
match event {
|
| 388 |
-
TranscriptResultStream::TranscriptEvent(transcript_event) => {
|
| 389 |
-
let transcript = transcript_event.transcript.expect("transcript");
|
| 390 |
-
for result in transcript.results.unwrap_or_default() {
|
| 391 |
-
if !result.is_partial {
|
| 392 |
-
let first_alternative = &result.alternatives.as_ref().expect("should have")[0];
|
| 393 |
-
let slice = first_alternative.transcript.as_ref().expect("should have");
|
| 394 |
-
yield Ok(slice.clone());
|
| 395 |
-
}
|
| 396 |
-
}
|
| 397 |
-
}
|
| 398 |
-
_ => yield Err(StreamTranscriptionError::Unknown),
|
| 399 |
-
}
|
| 400 |
-
}
|
| 401 |
-
}
|
| 402 |
-
}
|
| 403 |
|
| 404 |
// {"time":180,"type":"viseme","value":"r"}
|
| 405 |
#[derive(Debug, Deserialize, Clone, Serialize)]
|
|
@@ -449,34 +376,3 @@ async fn synthesize_speech(
|
|
| 449 |
.collect();
|
| 450 |
Ok((parsed, audio.audio_stream))
|
| 451 |
}
|
| 452 |
-
|
| 453 |
-
#[derive(Debug)]
|
| 454 |
-
enum StreamTranscriptionError {
|
| 455 |
-
EstablishStreamError(Box<dyn Error + Send + Sync>),
|
| 456 |
-
TranscriptResultStreamError(Box<dyn Error + Send + Sync>),
|
| 457 |
-
Unknown,
|
| 458 |
-
}
|
| 459 |
-
|
| 460 |
-
impl Display for StreamTranscriptionError {
|
| 461 |
-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
| 462 |
-
match self {
|
| 463 |
-
StreamTranscriptionError::EstablishStreamError(e) => {
|
| 464 |
-
write!(f, "EstablishStreamError: {}", e)
|
| 465 |
-
}
|
| 466 |
-
StreamTranscriptionError::TranscriptResultStreamError(e) => {
|
| 467 |
-
write!(f, "TranscriptResultStreamError: {}", e)
|
| 468 |
-
}
|
| 469 |
-
StreamTranscriptionError::Unknown => write!(f, "Unknown"),
|
| 470 |
-
}
|
| 471 |
-
}
|
| 472 |
-
}
|
| 473 |
-
|
| 474 |
-
impl Error for StreamTranscriptionError {
|
| 475 |
-
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
| 476 |
-
match self {
|
| 477 |
-
StreamTranscriptionError::EstablishStreamError(e) => Some(e.as_ref()),
|
| 478 |
-
StreamTranscriptionError::TranscriptResultStreamError(e) => Some(e.as_ref()),
|
| 479 |
-
StreamTranscriptionError::Unknown => None,
|
| 480 |
-
}
|
| 481 |
-
}
|
| 482 |
-
}
|
|
|
|
|
|
|
| 1 |
use aws_config::SdkConfig;
|
| 2 |
use aws_sdk_polly::primitives::ByteStream;
|
| 3 |
use aws_sdk_polly::types::{Engine, OutputFormat, SpeechMarkType, VoiceId};
|
| 4 |
+
use aws_sdk_transcribestreaming::types::{LanguageCode};
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
use futures_util::future::try_join;
|
|
|
|
| 6 |
use serde::{Deserialize, Serialize};
|
| 7 |
use std::collections::BTreeMap;
|
| 8 |
+
use std::fmt::{Display};
|
|
|
|
| 9 |
use std::io::BufRead;
|
| 10 |
use std::sync::{Arc, Weak};
|
| 11 |
use tokio::sync::RwLock;
|
| 12 |
|
| 13 |
use tokio::select;
|
| 14 |
+
use crate::asr::Event;
|
| 15 |
|
| 16 |
#[derive(Clone, Debug)]
|
| 17 |
pub struct LessonsManager {
|
| 18 |
translate_client: aws_sdk_translate::Client,
|
| 19 |
polly_client: aws_sdk_polly::Client,
|
|
|
|
| 20 |
lessons: Arc<RwLock<BTreeMap<u32, Lesson>>>,
|
| 21 |
}
|
| 22 |
|
| 23 |
impl LessonsManager {
|
| 24 |
pub(crate) fn new(sdk_config: &SdkConfig) -> Self {
|
|
|
|
| 25 |
let translate_client = aws_sdk_translate::Client::new(sdk_config);
|
| 26 |
let polly_client = aws_sdk_polly::Client::new(sdk_config);
|
| 27 |
LessonsManager {
|
| 28 |
translate_client,
|
| 29 |
polly_client,
|
|
|
|
| 30 |
lessons: Arc::new(RwLock::new(BTreeMap::new())),
|
| 31 |
}
|
| 32 |
}
|
|
|
|
| 69 |
}
|
| 70 |
}
|
| 71 |
|
| 72 |
+
pub(crate) async fn send(&self, frame: Vec<i16>) -> anyhow::Result<()> {
|
| 73 |
+
Ok(self.inner.speaker_voice_channel.send(frame).await?)
|
| 74 |
}
|
| 75 |
|
| 76 |
+
pub(crate) fn transcript_channel(&self) -> tokio::sync::broadcast::Receiver<Event> {
|
| 77 |
self.inner.speaker_transcript.subscribe()
|
| 78 |
}
|
| 79 |
}
|
|
|
|
| 90 |
struct InnerLesson {
|
| 91 |
parent: LessonsManager,
|
| 92 |
speaker_lang: LanguageCode,
|
| 93 |
+
speaker_voice_channel: tokio::sync::mpsc::Sender<Vec<i16>>,
|
| 94 |
+
speaker_transcript: tokio::sync::broadcast::Sender<Event>,
|
| 95 |
lang_lessons: RwLock<BTreeMap<String, Weak<InnerLangLesson>>>,
|
| 96 |
drop_handler: Option<tokio::sync::oneshot::Sender<Signal>>,
|
| 97 |
}
|
| 98 |
|
| 99 |
impl InnerLesson {
|
| 100 |
fn new(parent: LessonsManager, speaker_lang: LanguageCode) -> InnerLesson {
|
| 101 |
+
let (speaker_transcript, _) = tokio::sync::broadcast::channel::<Event>(128);
|
|
|
|
| 102 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel(128);
|
| 103 |
let (drop_handler, drop_rx) = tokio::sync::oneshot::channel::<Signal>();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
InnerLesson {
|
| 106 |
parent,
|
|
|
|
| 172 |
let (drop_handler, drop_rx) = tokio::sync::oneshot::channel::<Signal>();
|
| 173 |
tokio::spawn(async move {
|
| 174 |
let fut = async {
|
| 175 |
+
while let Ok(evt) = transcript_rx.recv().await {
|
| 176 |
let output = translate_client
|
| 177 |
.translate_text()
|
| 178 |
+
.text(evt.transcript)
|
| 179 |
.source_language_code(shared_speaker_lang.as_str())
|
| 180 |
.target_language_code(shared_lang.clone())
|
| 181 |
.send()
|
|
|
|
| 327 |
}
|
| 328 |
}
|
| 329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
// {"time":180,"type":"viseme","value":"r"}
|
| 332 |
#[derive(Debug, Deserialize, Clone, Serialize)]
|
|
|
|
| 376 |
.collect();
|
| 377 |
Ok((parsed, audio.audio_stream))
|
| 378 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/main.rs
CHANGED
|
@@ -25,8 +25,6 @@ use tracing::debug;
|
|
| 25 |
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
| 26 |
|
| 27 |
use crate::{config::*, lesson::*};
|
| 28 |
-
#[cfg(feature = "whisper")]
|
| 29 |
-
use crate::whisper::*;
|
| 30 |
|
| 31 |
mod config;
|
| 32 |
mod lesson;
|
|
@@ -105,30 +103,17 @@ async fn stream_speaker(
|
|
| 105 |
let prompt = query.prompt.clone().unwrap_or_default();
|
| 106 |
|
| 107 |
ws.on_upgrade(|mut socket| async move {
|
| 108 |
-
let _origin_tx = lesson.voice_channel();
|
| 109 |
let mut transcribe_rx = lesson.transcript_channel();
|
| 110 |
-
#[cfg(feature = "whisper")]
|
| 111 |
-
let mut whisper = asr::whisper::whisper_asr::CONTEXT.create_handler(&SETTINGS.whisper, prompt)
|
| 112 |
-
.expect("failed to create whisper");
|
| 113 |
-
#[cfg(feature = "whisper")]
|
| 114 |
-
let mut whisper_transcribe_rx = whisper.subscribe();
|
| 115 |
loop {
|
| 116 |
select! {
|
| 117 |
-
// w = whisper_transcribe_rx.recv() => {
|
| 118 |
-
// let Ok(_txt) = w else {
|
| 119 |
-
// // TODO: handle msg
|
| 120 |
-
// continue
|
| 121 |
-
// };
|
| 122 |
-
// }
|
| 123 |
msg = socket.next() => {
|
| 124 |
match msg.as_ref() {
|
| 125 |
Some(Ok(Message::Binary(bin))) => {
|
| 126 |
-
|
| 127 |
-
let
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
// }
|
| 132 |
},
|
| 133 |
Some(Ok(_)) => {
|
| 134 |
tracing::warn!("Other: {:?}", msg);
|
|
@@ -145,9 +130,9 @@ async fn stream_speaker(
|
|
| 145 |
}
|
| 146 |
},
|
| 147 |
output = transcribe_rx.recv() => {
|
| 148 |
-
if let Ok(
|
| 149 |
-
tracing::trace!("Transcribed: {}", transcript);
|
| 150 |
-
let evt = LiveLessonTextEvent::Transcription { text: transcript
|
| 151 |
let json = serde_json::to_string(&evt).expect("failed to serialize");
|
| 152 |
let _ = socket.send(Message::Text(json)).await.expect("failed to send");
|
| 153 |
}
|
|
@@ -207,8 +192,8 @@ async fn stream_listener(
|
|
| 207 |
loop {
|
| 208 |
select! {
|
| 209 |
transcript = transcript_rx.recv() => {
|
| 210 |
-
if let Ok(
|
| 211 |
-
let evt = LiveLessonTextEvent::Transcription { text: transcript };
|
| 212 |
match serde_json::to_string(&evt) {
|
| 213 |
Ok(json) => {
|
| 214 |
tracing::debug!("Transcribed: {}", json);
|
|
@@ -250,3 +235,14 @@ async fn stream_listener(
|
|
| 250 |
}
|
| 251 |
})
|
| 252 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
| 26 |
|
| 27 |
use crate::{config::*, lesson::*};
|
|
|
|
|
|
|
| 28 |
|
| 29 |
mod config;
|
| 30 |
mod lesson;
|
|
|
|
| 103 |
let prompt = query.prompt.clone().unwrap_or_default();
|
| 104 |
|
| 105 |
ws.on_upgrade(|mut socket| async move {
|
|
|
|
| 106 |
let mut transcribe_rx = lesson.transcript_channel();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
loop {
|
| 108 |
select! {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
msg = socket.next() => {
|
| 110 |
match msg.as_ref() {
|
| 111 |
Some(Ok(Message::Binary(bin))) => {
|
| 112 |
+
let frame = u8_to_i16(bin);
|
| 113 |
+
if let Err(e) = lesson.send(frame).await {
|
| 114 |
+
tracing::warn!("failed to send voice: {}", e);
|
| 115 |
+
break;
|
| 116 |
+
}
|
|
|
|
| 117 |
},
|
| 118 |
Some(Ok(_)) => {
|
| 119 |
tracing::warn!("Other: {:?}", msg);
|
|
|
|
| 130 |
}
|
| 131 |
},
|
| 132 |
output = transcribe_rx.recv() => {
|
| 133 |
+
if let Ok(evt) = output {
|
| 134 |
+
tracing::trace!("Transcribed: {}", evt.transcript);
|
| 135 |
+
let evt = LiveLessonTextEvent::Transcription { text: evt.transcript };
|
| 136 |
let json = serde_json::to_string(&evt).expect("failed to serialize");
|
| 137 |
let _ = socket.send(Message::Text(json)).await.expect("failed to send");
|
| 138 |
}
|
|
|
|
| 192 |
loop {
|
| 193 |
select! {
|
| 194 |
transcript = transcript_rx.recv() => {
|
| 195 |
+
if let Ok(evt) = transcript {
|
| 196 |
+
let evt = LiveLessonTextEvent::Transcription { text: evt.transcript };
|
| 197 |
match serde_json::to_string(&evt) {
|
| 198 |
Ok(json) => {
|
| 199 |
tracing::debug!("Transcribed: {}", json);
|
|
|
|
| 235 |
}
|
| 236 |
})
|
| 237 |
}
|
| 238 |
+
|
| 239 |
+
fn u8_to_i16(input: &[u8]) -> Vec<i16> {
|
| 240 |
+
input
|
| 241 |
+
.chunks_exact(2)
|
| 242 |
+
.map(|chunk| {
|
| 243 |
+
let mut buf = [0u8; 2];
|
| 244 |
+
buf.copy_from_slice(chunk);
|
| 245 |
+
i16::from_le_bytes(buf)
|
| 246 |
+
})
|
| 247 |
+
.collect::<Vec<i16>>()
|
| 248 |
+
}
|
whisper/src/handler.rs
CHANGED
|
@@ -77,16 +77,6 @@ impl std::error::Error for Error {
|
|
| 77 |
}
|
| 78 |
}
|
| 79 |
|
| 80 |
-
fn u8_to_i16(input: &[u8]) -> Vec<i16> {
|
| 81 |
-
input
|
| 82 |
-
.chunks_exact(2)
|
| 83 |
-
.map(|chunk| {
|
| 84 |
-
let mut buf = [0u8; 2];
|
| 85 |
-
buf.copy_from_slice(chunk);
|
| 86 |
-
i16::from_le_bytes(buf)
|
| 87 |
-
})
|
| 88 |
-
.collect::<Vec<i16>>()
|
| 89 |
-
}
|
| 90 |
|
| 91 |
#[derive(Clone, Debug)]
|
| 92 |
pub enum Output {
|
|
@@ -206,11 +196,6 @@ impl WhisperHandler {
|
|
| 206 |
pub async fn send_i16(&mut self, data: Vec<i16>) -> Result<(), mpsc::error::SendError<Vec<i16>>> {
|
| 207 |
self.tx.send(data).await
|
| 208 |
}
|
| 209 |
-
|
| 210 |
-
pub async fn send_bytes(&mut self, data: Vec<u8>) -> Result<(), mpsc::error::SendError<Vec<i16>>> {
|
| 211 |
-
let i16_data = u8_to_i16(&data);
|
| 212 |
-
self.send_i16(i16_data).await
|
| 213 |
-
}
|
| 214 |
}
|
| 215 |
|
| 216 |
#[allow(dead_code)]
|
|
|
|
| 77 |
}
|
| 78 |
}
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
#[derive(Clone, Debug)]
|
| 82 |
pub enum Output {
|
|
|
|
| 196 |
pub async fn send_i16(&mut self, data: Vec<i16>) -> Result<(), mpsc::error::SendError<Vec<i16>>> {
|
| 197 |
self.tx.send(data).await
|
| 198 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
}
|
| 200 |
|
| 201 |
#[allow(dead_code)]
|