Spaces:
Sleeping
Sleeping
split whisper into a standalone crate
Browse files- src/asr/aws.rs +167 -4
- src/lesson.rs +11 -115
- src/main.rs +21 -25
- whisper/src/handler.rs +0 -15
src/asr/aws.rs
CHANGED
@@ -1,17 +1,180 @@
|
|
|
|
|
|
|
|
1 |
use async_trait::async_trait;
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
use tokio::sync::broadcast::Receiver;
|
|
|
|
|
3 |
use crate::asr::{ASR, Event};
|
4 |
|
5 |
-
struct AWS_ASR {
|
6 |
-
|
|
|
|
|
|
|
7 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
#[async_trait]
|
9 |
impl ASR for AWS_ASR {
|
10 |
async fn frame(&mut self, frame: &[i16]) -> anyhow::Result<()> {
|
11 |
-
|
12 |
}
|
13 |
|
14 |
fn subscribe(&mut self) -> Receiver<Event> {
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
}
|
17 |
}
|
|
|
1 |
+
use std::error::Error;
|
2 |
+
use std::fmt::{Display, Formatter};
|
3 |
+
use async_stream::stream;
|
4 |
use async_trait::async_trait;
|
5 |
+
use aws_sdk_transcribestreaming::operation::start_stream_transcription::StartStreamTranscriptionOutput;
|
6 |
+
use aws_sdk_transcribestreaming::primitives::Blob;
|
7 |
+
use aws_sdk_transcribestreaming::types::{
|
8 |
+
AudioEvent, AudioStream, LanguageCode, MediaEncoding, TranscriptResultStream,
|
9 |
+
};
|
10 |
+
use tokio::select;
|
11 |
use tokio::sync::broadcast::Receiver;
|
12 |
+
use tokio_stream::Stream;
|
13 |
+
use futures_util::TryStreamExt;
|
14 |
use crate::asr::{ASR, Event};
|
15 |
|
16 |
+
pub struct AWS_ASR {
|
17 |
+
client: aws_sdk_transcribestreaming::Client,
|
18 |
+
speaker_voice_channel: tokio::sync::mpsc::Sender<Vec<i16>>,
|
19 |
+
speaker_transcript: tokio::sync::broadcast::Sender<Event>,
|
20 |
+
drop_handler: Option<tokio::sync::oneshot::Sender<()>>,
|
21 |
}
|
22 |
+
|
23 |
+
impl AWS_ASR {
|
24 |
+
pub async fn from_env(lang: LanguageCode) -> anyhow::Result<Self> {
|
25 |
+
let config = aws_config::load_from_env().await;
|
26 |
+
let transcript_client = aws_sdk_transcribestreaming::Client::new(&config);
|
27 |
+
let client = transcript_client.clone();
|
28 |
+
|
29 |
+
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel::<Vec<i16>>(128);
|
30 |
+
let (speaker_transcript, _) = tokio::sync::broadcast::channel::<Event>(128);
|
31 |
+
let shared_speaker_transcript = speaker_transcript.clone();
|
32 |
+
|
33 |
+
let (drop_handler, drop_rx) = tokio::sync::oneshot::channel::<()>();
|
34 |
+
|
35 |
+
tokio::spawn(async move {
|
36 |
+
let fut = async {
|
37 |
+
let input_stream = stream! {
|
38 |
+
while let Some(raw) = speaker_voice_rx.recv().await {
|
39 |
+
let reshape = slice_i16_to_u8(&raw);
|
40 |
+
yield Ok(AudioStream::AudioEvent(AudioEvent::builder().audio_chunk(Blob::new(reshape)).build()));
|
41 |
+
}
|
42 |
+
};
|
43 |
+
let output = transcript_client
|
44 |
+
.start_stream_transcription()
|
45 |
+
.language_code(lang) //LanguageCode::EnGb
|
46 |
+
.media_sample_rate_hertz(16000)
|
47 |
+
.media_encoding(MediaEncoding::Pcm)
|
48 |
+
.audio_stream(input_stream.into())
|
49 |
+
.send()
|
50 |
+
.await
|
51 |
+
.map_err(|e| StreamTranscriptionError::EstablishStreamError(Box::new(e)))?;
|
52 |
+
|
53 |
+
let output_stream = to_stream(output);
|
54 |
+
output_stream
|
55 |
+
.try_for_each(|text| async {
|
56 |
+
let _ = shared_speaker_transcript.send(text);
|
57 |
+
Ok(())
|
58 |
+
})
|
59 |
+
.await?;
|
60 |
+
Ok(()) as anyhow::Result<()>
|
61 |
+
};
|
62 |
+
select! {
|
63 |
+
res = fut => {
|
64 |
+
if let Err(e) = res {
|
65 |
+
println!("Error: {:?}", e);
|
66 |
+
}
|
67 |
+
}
|
68 |
+
_ = drop_rx => {}
|
69 |
+
}
|
70 |
+
});
|
71 |
+
|
72 |
+
Ok(Self {
|
73 |
+
client,
|
74 |
+
speaker_voice_channel,
|
75 |
+
speaker_transcript,
|
76 |
+
drop_handler: Some(drop_handler)
|
77 |
+
})
|
78 |
+
}
|
79 |
+
}
|
80 |
+
|
81 |
+
#[allow(dead_code)]
|
82 |
+
fn slice_i16_to_u8(slice: &[i16]) -> Vec<u8> {
|
83 |
+
slice
|
84 |
+
.iter()
|
85 |
+
.flat_map(|&sample| {
|
86 |
+
[(sample >> 8) as u8, sample as u8]
|
87 |
+
})
|
88 |
+
.collect()
|
89 |
+
}
|
90 |
+
|
91 |
+
impl Drop for AWS_ASR {
|
92 |
+
fn drop(&mut self) {
|
93 |
+
if let Some(drop_handler) = self.drop_handler.take() {
|
94 |
+
let _ = drop_handler.send(());
|
95 |
+
}
|
96 |
+
}
|
97 |
+
}
|
98 |
+
|
99 |
+
|
100 |
#[async_trait]
|
101 |
impl ASR for AWS_ASR {
|
102 |
async fn frame(&mut self, frame: &[i16]) -> anyhow::Result<()> {
|
103 |
+
Ok(self.speaker_voice_channel.send(frame.to_vec()).await?)
|
104 |
}
|
105 |
|
106 |
fn subscribe(&mut self) -> Receiver<Event> {
|
107 |
+
self.speaker_transcript.subscribe()
|
108 |
+
}
|
109 |
+
}
|
110 |
+
|
111 |
+
#[allow(dead_code)]
|
112 |
+
fn to_stream(
|
113 |
+
mut output: StartStreamTranscriptionOutput,
|
114 |
+
) -> impl Stream<Item = Result<Event, StreamTranscriptionError>> {
|
115 |
+
stream! {
|
116 |
+
while let Some(event) = output
|
117 |
+
.transcript_result_stream
|
118 |
+
.recv()
|
119 |
+
.await
|
120 |
+
.map_err(|e| StreamTranscriptionError::TranscriptResultStreamError(Box::new(e)))? {
|
121 |
+
match event {
|
122 |
+
TranscriptResultStream::TranscriptEvent(transcript_event) => {
|
123 |
+
let Some(transcript) = transcript_event.transcript else {
|
124 |
+
continue
|
125 |
+
};
|
126 |
+
|
127 |
+
for result in transcript.results.unwrap_or_default() {
|
128 |
+
let Some(alternatives) = result.alternatives else {
|
129 |
+
continue
|
130 |
+
};
|
131 |
+
let Some(first_alternative) = alternatives.first() else {
|
132 |
+
continue
|
133 |
+
};
|
134 |
+
let Some(text) = &first_alternative.transcript else {
|
135 |
+
continue
|
136 |
+
};
|
137 |
+
let evt = Event {
|
138 |
+
transcript: text.clone(),
|
139 |
+
is_final: !result.is_partial,
|
140 |
+
};
|
141 |
+
yield Ok(evt);
|
142 |
+
}
|
143 |
+
}
|
144 |
+
_ => yield Err(StreamTranscriptionError::Unknown),
|
145 |
+
}
|
146 |
+
}
|
147 |
+
}
|
148 |
+
}
|
149 |
+
|
150 |
+
|
151 |
+
#[derive(Debug)]
|
152 |
+
enum StreamTranscriptionError {
|
153 |
+
EstablishStreamError(Box<dyn Error + Send + Sync>),
|
154 |
+
TranscriptResultStreamError(Box<dyn Error + Send + Sync>),
|
155 |
+
Unknown,
|
156 |
+
}
|
157 |
+
|
158 |
+
impl Display for StreamTranscriptionError {
|
159 |
+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
160 |
+
match self {
|
161 |
+
StreamTranscriptionError::EstablishStreamError(e) => {
|
162 |
+
write!(f, "EstablishStreamError: {}", e)
|
163 |
+
}
|
164 |
+
StreamTranscriptionError::TranscriptResultStreamError(e) => {
|
165 |
+
write!(f, "TranscriptResultStreamError: {}", e)
|
166 |
+
}
|
167 |
+
StreamTranscriptionError::Unknown => write!(f, "Unknown"),
|
168 |
+
}
|
169 |
+
}
|
170 |
+
}
|
171 |
+
|
172 |
+
impl Error for StreamTranscriptionError {
|
173 |
+
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
174 |
+
match self {
|
175 |
+
StreamTranscriptionError::EstablishStreamError(e) => Some(e.as_ref()),
|
176 |
+
StreamTranscriptionError::TranscriptResultStreamError(e) => Some(e.as_ref()),
|
177 |
+
StreamTranscriptionError::Unknown => None,
|
178 |
+
}
|
179 |
}
|
180 |
}
|
src/lesson.rs
CHANGED
@@ -1,41 +1,32 @@
|
|
1 |
-
use async_stream::stream;
|
2 |
use aws_config::SdkConfig;
|
3 |
use aws_sdk_polly::primitives::ByteStream;
|
4 |
use aws_sdk_polly::types::{Engine, OutputFormat, SpeechMarkType, VoiceId};
|
5 |
-
use aws_sdk_transcribestreaming::
|
6 |
-
use aws_sdk_transcribestreaming::primitives::Blob;
|
7 |
-
use aws_sdk_transcribestreaming::types::{
|
8 |
-
AudioEvent, AudioStream, LanguageCode, MediaEncoding, TranscriptResultStream,
|
9 |
-
};
|
10 |
use futures_util::future::try_join;
|
11 |
-
use futures_util::{Stream, TryStreamExt};
|
12 |
use serde::{Deserialize, Serialize};
|
13 |
use std::collections::BTreeMap;
|
14 |
-
use std::
|
15 |
-
use std::fmt::{Display, Formatter};
|
16 |
use std::io::BufRead;
|
17 |
use std::sync::{Arc, Weak};
|
18 |
use tokio::sync::RwLock;
|
19 |
|
20 |
use tokio::select;
|
|
|
21 |
|
22 |
#[derive(Clone, Debug)]
|
23 |
pub struct LessonsManager {
|
24 |
translate_client: aws_sdk_translate::Client,
|
25 |
polly_client: aws_sdk_polly::Client,
|
26 |
-
transcript_client: aws_sdk_transcribestreaming::Client,
|
27 |
lessons: Arc<RwLock<BTreeMap<u32, Lesson>>>,
|
28 |
}
|
29 |
|
30 |
impl LessonsManager {
|
31 |
pub(crate) fn new(sdk_config: &SdkConfig) -> Self {
|
32 |
-
let transcript_client = aws_sdk_transcribestreaming::Client::new(sdk_config);
|
33 |
let translate_client = aws_sdk_translate::Client::new(sdk_config);
|
34 |
let polly_client = aws_sdk_polly::Client::new(sdk_config);
|
35 |
LessonsManager {
|
36 |
translate_client,
|
37 |
polly_client,
|
38 |
-
transcript_client,
|
39 |
lessons: Arc::new(RwLock::new(BTreeMap::new())),
|
40 |
}
|
41 |
}
|
@@ -78,11 +69,11 @@ impl Lesson {
|
|
78 |
}
|
79 |
}
|
80 |
|
81 |
-
pub(crate) fn
|
82 |
-
self.inner.speaker_voice_channel.
|
83 |
}
|
84 |
|
85 |
-
pub(crate) fn transcript_channel(&self) -> tokio::sync::broadcast::Receiver<
|
86 |
self.inner.speaker_transcript.subscribe()
|
87 |
}
|
88 |
}
|
@@ -99,56 +90,17 @@ impl From<InnerLesson> for Lesson {
|
|
99 |
struct InnerLesson {
|
100 |
parent: LessonsManager,
|
101 |
speaker_lang: LanguageCode,
|
102 |
-
speaker_voice_channel: tokio::sync::mpsc::Sender<Vec<
|
103 |
-
speaker_transcript: tokio::sync::broadcast::Sender<
|
104 |
lang_lessons: RwLock<BTreeMap<String, Weak<InnerLangLesson>>>,
|
105 |
drop_handler: Option<tokio::sync::oneshot::Sender<Signal>>,
|
106 |
}
|
107 |
|
108 |
impl InnerLesson {
|
109 |
fn new(parent: LessonsManager, speaker_lang: LanguageCode) -> InnerLesson {
|
110 |
-
let (speaker_transcript, _) = tokio::sync::broadcast::channel::<
|
111 |
-
let shared_speaker_transcript = speaker_transcript.clone();
|
112 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel(128);
|
113 |
let (drop_handler, drop_rx) = tokio::sync::oneshot::channel::<Signal>();
|
114 |
-
let transcript_client = parent.transcript_client.clone();
|
115 |
-
let shared_speak_lang = speaker_lang.clone();
|
116 |
-
|
117 |
-
tokio::spawn(async move {
|
118 |
-
let fut = async {
|
119 |
-
let input_stream = stream! {
|
120 |
-
while let Some(raw) = speaker_voice_rx.recv().await {
|
121 |
-
yield Ok(AudioStream::AudioEvent(AudioEvent::builder().audio_chunk(Blob::new(raw)).build()));
|
122 |
-
}
|
123 |
-
};
|
124 |
-
let output = transcript_client
|
125 |
-
.start_stream_transcription()
|
126 |
-
.language_code(shared_speak_lang) //LanguageCode::EnGb
|
127 |
-
.media_sample_rate_hertz(16000)
|
128 |
-
.media_encoding(MediaEncoding::Pcm)
|
129 |
-
.audio_stream(input_stream.into())
|
130 |
-
.send()
|
131 |
-
.await
|
132 |
-
.map_err(|e| StreamTranscriptionError::EstablishStreamError(Box::new(e)))?;
|
133 |
-
|
134 |
-
let output_stream = to_stream(output);
|
135 |
-
output_stream
|
136 |
-
.try_for_each(|text| async {
|
137 |
-
let _ = shared_speaker_transcript.send(text);
|
138 |
-
Ok(())
|
139 |
-
})
|
140 |
-
.await?;
|
141 |
-
Ok(()) as Result<(), StreamTranscriptionError>
|
142 |
-
};
|
143 |
-
select! {
|
144 |
-
res = fut => {
|
145 |
-
if let Err(e) = res {
|
146 |
-
println!("Error: {:?}", e);
|
147 |
-
}
|
148 |
-
}
|
149 |
-
_ = drop_rx => {}
|
150 |
-
}
|
151 |
-
});
|
152 |
|
153 |
InnerLesson {
|
154 |
parent,
|
@@ -220,10 +172,10 @@ impl LangLesson {
|
|
220 |
let (drop_handler, drop_rx) = tokio::sync::oneshot::channel::<Signal>();
|
221 |
tokio::spawn(async move {
|
222 |
let fut = async {
|
223 |
-
while let Ok(
|
224 |
let output = translate_client
|
225 |
.translate_text()
|
226 |
-
.text(
|
227 |
.source_language_code(shared_speaker_lang.as_str())
|
228 |
.target_language_code(shared_lang.clone())
|
229 |
.send()
|
@@ -375,31 +327,6 @@ impl Drop for InnerVoiceLesson {
|
|
375 |
}
|
376 |
}
|
377 |
|
378 |
-
fn to_stream(
|
379 |
-
mut output: StartStreamTranscriptionOutput,
|
380 |
-
) -> impl Stream<Item = Result<String, StreamTranscriptionError>> {
|
381 |
-
stream! {
|
382 |
-
while let Some(event) = output
|
383 |
-
.transcript_result_stream
|
384 |
-
.recv()
|
385 |
-
.await
|
386 |
-
.map_err(|e| StreamTranscriptionError::TranscriptResultStreamError(Box::new(e)))? {
|
387 |
-
match event {
|
388 |
-
TranscriptResultStream::TranscriptEvent(transcript_event) => {
|
389 |
-
let transcript = transcript_event.transcript.expect("transcript");
|
390 |
-
for result in transcript.results.unwrap_or_default() {
|
391 |
-
if !result.is_partial {
|
392 |
-
let first_alternative = &result.alternatives.as_ref().expect("should have")[0];
|
393 |
-
let slice = first_alternative.transcript.as_ref().expect("should have");
|
394 |
-
yield Ok(slice.clone());
|
395 |
-
}
|
396 |
-
}
|
397 |
-
}
|
398 |
-
_ => yield Err(StreamTranscriptionError::Unknown),
|
399 |
-
}
|
400 |
-
}
|
401 |
-
}
|
402 |
-
}
|
403 |
|
404 |
// {"time":180,"type":"viseme","value":"r"}
|
405 |
#[derive(Debug, Deserialize, Clone, Serialize)]
|
@@ -449,34 +376,3 @@ async fn synthesize_speech(
|
|
449 |
.collect();
|
450 |
Ok((parsed, audio.audio_stream))
|
451 |
}
|
452 |
-
|
453 |
-
#[derive(Debug)]
|
454 |
-
enum StreamTranscriptionError {
|
455 |
-
EstablishStreamError(Box<dyn Error + Send + Sync>),
|
456 |
-
TranscriptResultStreamError(Box<dyn Error + Send + Sync>),
|
457 |
-
Unknown,
|
458 |
-
}
|
459 |
-
|
460 |
-
impl Display for StreamTranscriptionError {
|
461 |
-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
462 |
-
match self {
|
463 |
-
StreamTranscriptionError::EstablishStreamError(e) => {
|
464 |
-
write!(f, "EstablishStreamError: {}", e)
|
465 |
-
}
|
466 |
-
StreamTranscriptionError::TranscriptResultStreamError(e) => {
|
467 |
-
write!(f, "TranscriptResultStreamError: {}", e)
|
468 |
-
}
|
469 |
-
StreamTranscriptionError::Unknown => write!(f, "Unknown"),
|
470 |
-
}
|
471 |
-
}
|
472 |
-
}
|
473 |
-
|
474 |
-
impl Error for StreamTranscriptionError {
|
475 |
-
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
476 |
-
match self {
|
477 |
-
StreamTranscriptionError::EstablishStreamError(e) => Some(e.as_ref()),
|
478 |
-
StreamTranscriptionError::TranscriptResultStreamError(e) => Some(e.as_ref()),
|
479 |
-
StreamTranscriptionError::Unknown => None,
|
480 |
-
}
|
481 |
-
}
|
482 |
-
}
|
|
|
|
|
1 |
use aws_config::SdkConfig;
|
2 |
use aws_sdk_polly::primitives::ByteStream;
|
3 |
use aws_sdk_polly::types::{Engine, OutputFormat, SpeechMarkType, VoiceId};
|
4 |
+
use aws_sdk_transcribestreaming::types::{LanguageCode};
|
|
|
|
|
|
|
|
|
5 |
use futures_util::future::try_join;
|
|
|
6 |
use serde::{Deserialize, Serialize};
|
7 |
use std::collections::BTreeMap;
|
8 |
+
use std::fmt::{Display};
|
|
|
9 |
use std::io::BufRead;
|
10 |
use std::sync::{Arc, Weak};
|
11 |
use tokio::sync::RwLock;
|
12 |
|
13 |
use tokio::select;
|
14 |
+
use crate::asr::Event;
|
15 |
|
16 |
#[derive(Clone, Debug)]
|
17 |
pub struct LessonsManager {
|
18 |
translate_client: aws_sdk_translate::Client,
|
19 |
polly_client: aws_sdk_polly::Client,
|
|
|
20 |
lessons: Arc<RwLock<BTreeMap<u32, Lesson>>>,
|
21 |
}
|
22 |
|
23 |
impl LessonsManager {
|
24 |
pub(crate) fn new(sdk_config: &SdkConfig) -> Self {
|
|
|
25 |
let translate_client = aws_sdk_translate::Client::new(sdk_config);
|
26 |
let polly_client = aws_sdk_polly::Client::new(sdk_config);
|
27 |
LessonsManager {
|
28 |
translate_client,
|
29 |
polly_client,
|
|
|
30 |
lessons: Arc::new(RwLock::new(BTreeMap::new())),
|
31 |
}
|
32 |
}
|
|
|
69 |
}
|
70 |
}
|
71 |
|
72 |
+
pub(crate) async fn send(&self, frame: Vec<i16>) -> anyhow::Result<()> {
|
73 |
+
Ok(self.inner.speaker_voice_channel.send(frame).await?)
|
74 |
}
|
75 |
|
76 |
+
pub(crate) fn transcript_channel(&self) -> tokio::sync::broadcast::Receiver<Event> {
|
77 |
self.inner.speaker_transcript.subscribe()
|
78 |
}
|
79 |
}
|
|
|
90 |
struct InnerLesson {
|
91 |
parent: LessonsManager,
|
92 |
speaker_lang: LanguageCode,
|
93 |
+
speaker_voice_channel: tokio::sync::mpsc::Sender<Vec<i16>>,
|
94 |
+
speaker_transcript: tokio::sync::broadcast::Sender<Event>,
|
95 |
lang_lessons: RwLock<BTreeMap<String, Weak<InnerLangLesson>>>,
|
96 |
drop_handler: Option<tokio::sync::oneshot::Sender<Signal>>,
|
97 |
}
|
98 |
|
99 |
impl InnerLesson {
|
100 |
fn new(parent: LessonsManager, speaker_lang: LanguageCode) -> InnerLesson {
|
101 |
+
let (speaker_transcript, _) = tokio::sync::broadcast::channel::<Event>(128);
|
|
|
102 |
let (speaker_voice_channel, mut speaker_voice_rx) = tokio::sync::mpsc::channel(128);
|
103 |
let (drop_handler, drop_rx) = tokio::sync::oneshot::channel::<Signal>();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
InnerLesson {
|
106 |
parent,
|
|
|
172 |
let (drop_handler, drop_rx) = tokio::sync::oneshot::channel::<Signal>();
|
173 |
tokio::spawn(async move {
|
174 |
let fut = async {
|
175 |
+
while let Ok(evt) = transcript_rx.recv().await {
|
176 |
let output = translate_client
|
177 |
.translate_text()
|
178 |
+
.text(evt.transcript)
|
179 |
.source_language_code(shared_speaker_lang.as_str())
|
180 |
.target_language_code(shared_lang.clone())
|
181 |
.send()
|
|
|
327 |
}
|
328 |
}
|
329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
|
331 |
// {"time":180,"type":"viseme","value":"r"}
|
332 |
#[derive(Debug, Deserialize, Clone, Serialize)]
|
|
|
376 |
.collect();
|
377 |
Ok((parsed, audio.audio_stream))
|
378 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/main.rs
CHANGED
@@ -25,8 +25,6 @@ use tracing::debug;
|
|
25 |
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
26 |
|
27 |
use crate::{config::*, lesson::*};
|
28 |
-
#[cfg(feature = "whisper")]
|
29 |
-
use crate::whisper::*;
|
30 |
|
31 |
mod config;
|
32 |
mod lesson;
|
@@ -105,30 +103,17 @@ async fn stream_speaker(
|
|
105 |
let prompt = query.prompt.clone().unwrap_or_default();
|
106 |
|
107 |
ws.on_upgrade(|mut socket| async move {
|
108 |
-
let _origin_tx = lesson.voice_channel();
|
109 |
let mut transcribe_rx = lesson.transcript_channel();
|
110 |
-
#[cfg(feature = "whisper")]
|
111 |
-
let mut whisper = asr::whisper::whisper_asr::CONTEXT.create_handler(&SETTINGS.whisper, prompt)
|
112 |
-
.expect("failed to create whisper");
|
113 |
-
#[cfg(feature = "whisper")]
|
114 |
-
let mut whisper_transcribe_rx = whisper.subscribe();
|
115 |
loop {
|
116 |
select! {
|
117 |
-
// w = whisper_transcribe_rx.recv() => {
|
118 |
-
// let Ok(_txt) = w else {
|
119 |
-
// // TODO: handle msg
|
120 |
-
// continue
|
121 |
-
// };
|
122 |
-
// }
|
123 |
msg = socket.next() => {
|
124 |
match msg.as_ref() {
|
125 |
Some(Ok(Message::Binary(bin))) => {
|
126 |
-
|
127 |
-
let
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
// }
|
132 |
},
|
133 |
Some(Ok(_)) => {
|
134 |
tracing::warn!("Other: {:?}", msg);
|
@@ -145,9 +130,9 @@ async fn stream_speaker(
|
|
145 |
}
|
146 |
},
|
147 |
output = transcribe_rx.recv() => {
|
148 |
-
if let Ok(
|
149 |
-
tracing::trace!("Transcribed: {}", transcript);
|
150 |
-
let evt = LiveLessonTextEvent::Transcription { text: transcript
|
151 |
let json = serde_json::to_string(&evt).expect("failed to serialize");
|
152 |
let _ = socket.send(Message::Text(json)).await.expect("failed to send");
|
153 |
}
|
@@ -207,8 +192,8 @@ async fn stream_listener(
|
|
207 |
loop {
|
208 |
select! {
|
209 |
transcript = transcript_rx.recv() => {
|
210 |
-
if let Ok(
|
211 |
-
let evt = LiveLessonTextEvent::Transcription { text: transcript };
|
212 |
match serde_json::to_string(&evt) {
|
213 |
Ok(json) => {
|
214 |
tracing::debug!("Transcribed: {}", json);
|
@@ -250,3 +235,14 @@ async fn stream_listener(
|
|
250 |
}
|
251 |
})
|
252 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
|
26 |
|
27 |
use crate::{config::*, lesson::*};
|
|
|
|
|
28 |
|
29 |
mod config;
|
30 |
mod lesson;
|
|
|
103 |
let prompt = query.prompt.clone().unwrap_or_default();
|
104 |
|
105 |
ws.on_upgrade(|mut socket| async move {
|
|
|
106 |
let mut transcribe_rx = lesson.transcript_channel();
|
|
|
|
|
|
|
|
|
|
|
107 |
loop {
|
108 |
select! {
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
msg = socket.next() => {
|
110 |
match msg.as_ref() {
|
111 |
Some(Ok(Message::Binary(bin))) => {
|
112 |
+
let frame = u8_to_i16(bin);
|
113 |
+
if let Err(e) = lesson.send(frame).await {
|
114 |
+
tracing::warn!("failed to send voice: {}", e);
|
115 |
+
break;
|
116 |
+
}
|
|
|
117 |
},
|
118 |
Some(Ok(_)) => {
|
119 |
tracing::warn!("Other: {:?}", msg);
|
|
|
130 |
}
|
131 |
},
|
132 |
output = transcribe_rx.recv() => {
|
133 |
+
if let Ok(evt) = output {
|
134 |
+
tracing::trace!("Transcribed: {}", evt.transcript);
|
135 |
+
let evt = LiveLessonTextEvent::Transcription { text: evt.transcript };
|
136 |
let json = serde_json::to_string(&evt).expect("failed to serialize");
|
137 |
let _ = socket.send(Message::Text(json)).await.expect("failed to send");
|
138 |
}
|
|
|
192 |
loop {
|
193 |
select! {
|
194 |
transcript = transcript_rx.recv() => {
|
195 |
+
if let Ok(evt) = transcript {
|
196 |
+
let evt = LiveLessonTextEvent::Transcription { text: evt.transcript };
|
197 |
match serde_json::to_string(&evt) {
|
198 |
Ok(json) => {
|
199 |
tracing::debug!("Transcribed: {}", json);
|
|
|
235 |
}
|
236 |
})
|
237 |
}
|
238 |
+
|
239 |
+
fn u8_to_i16(input: &[u8]) -> Vec<i16> {
|
240 |
+
input
|
241 |
+
.chunks_exact(2)
|
242 |
+
.map(|chunk| {
|
243 |
+
let mut buf = [0u8; 2];
|
244 |
+
buf.copy_from_slice(chunk);
|
245 |
+
i16::from_le_bytes(buf)
|
246 |
+
})
|
247 |
+
.collect::<Vec<i16>>()
|
248 |
+
}
|
whisper/src/handler.rs
CHANGED
@@ -77,16 +77,6 @@ impl std::error::Error for Error {
|
|
77 |
}
|
78 |
}
|
79 |
|
80 |
-
fn u8_to_i16(input: &[u8]) -> Vec<i16> {
|
81 |
-
input
|
82 |
-
.chunks_exact(2)
|
83 |
-
.map(|chunk| {
|
84 |
-
let mut buf = [0u8; 2];
|
85 |
-
buf.copy_from_slice(chunk);
|
86 |
-
i16::from_le_bytes(buf)
|
87 |
-
})
|
88 |
-
.collect::<Vec<i16>>()
|
89 |
-
}
|
90 |
|
91 |
#[derive(Clone, Debug)]
|
92 |
pub enum Output {
|
@@ -206,11 +196,6 @@ impl WhisperHandler {
|
|
206 |
pub async fn send_i16(&mut self, data: Vec<i16>) -> Result<(), mpsc::error::SendError<Vec<i16>>> {
|
207 |
self.tx.send(data).await
|
208 |
}
|
209 |
-
|
210 |
-
pub async fn send_bytes(&mut self, data: Vec<u8>) -> Result<(), mpsc::error::SendError<Vec<i16>>> {
|
211 |
-
let i16_data = u8_to_i16(&data);
|
212 |
-
self.send_i16(i16_data).await
|
213 |
-
}
|
214 |
}
|
215 |
|
216 |
#[allow(dead_code)]
|
|
|
77 |
}
|
78 |
}
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
#[derive(Clone, Debug)]
|
82 |
pub enum Output {
|
|
|
196 |
pub async fn send_i16(&mut self, data: Vec<i16>) -> Result<(), mpsc::error::SendError<Vec<i16>>> {
|
197 |
self.tx.send(data).await
|
198 |
}
|
|
|
|
|
|
|
|
|
|
|
199 |
}
|
200 |
|
201 |
#[allow(dead_code)]
|