Spaces:
Sleeping
Sleeping
clean
Browse files- src/lesson.rs +28 -2
- src/main.rs +0 -140
- static/index.html +2 -2
src/lesson.rs
CHANGED
@@ -4,12 +4,13 @@ use std::collections::BTreeMap;
|
|
4 |
use async_stream::stream;
|
5 |
use aws_config::SdkConfig;
|
6 |
use aws_sdk_polly::types::VoiceId;
|
|
|
7 |
use aws_sdk_transcribestreaming::primitives::Blob;
|
8 |
use aws_sdk_transcribestreaming::types::{AudioEvent, AudioStream, LanguageCode, MediaEncoding, TranscriptResultStream};
|
9 |
-
use futures_util::{StreamExt, TryStreamExt};
|
10 |
|
11 |
use tokio::select;
|
12 |
-
use crate::
|
13 |
|
14 |
#[derive(Clone, Debug)]
|
15 |
pub struct LessonsManager {
|
@@ -385,3 +386,28 @@ impl Drop for InnerVoiceLesson {
|
|
385 |
}
|
386 |
}
|
387 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
use async_stream::stream;
|
5 |
use aws_config::SdkConfig;
|
6 |
use aws_sdk_polly::types::VoiceId;
|
7 |
+
use aws_sdk_transcribestreaming::operation::start_stream_transcription::StartStreamTranscriptionOutput;
|
8 |
use aws_sdk_transcribestreaming::primitives::Blob;
|
9 |
use aws_sdk_transcribestreaming::types::{AudioEvent, AudioStream, LanguageCode, MediaEncoding, TranscriptResultStream};
|
10 |
+
use futures_util::{Stream, StreamExt, TryStreamExt};
|
11 |
|
12 |
use tokio::select;
|
13 |
+
use crate::StreamTranscriptionError;
|
14 |
|
15 |
#[derive(Clone, Debug)]
|
16 |
pub struct LessonsManager {
|
|
|
386 |
}
|
387 |
}
|
388 |
|
389 |
+
|
390 |
+
fn to_stream(mut output: StartStreamTranscriptionOutput) -> impl Stream<Item=Result<String, StreamTranscriptionError>> {
|
391 |
+
stream! {
|
392 |
+
while let Some(event) = output
|
393 |
+
.transcript_result_stream
|
394 |
+
.recv()
|
395 |
+
.await
|
396 |
+
.map_err(|e| StreamTranscriptionError::TranscriptResultStreamError(Box::new(e)))? {
|
397 |
+
match event {
|
398 |
+
TranscriptResultStream::TranscriptEvent(transcript_event) => {
|
399 |
+
let transcript = transcript_event.transcript.expect("transcript");
|
400 |
+
for result in transcript.results.unwrap_or_default() {
|
401 |
+
if !result.is_partial {
|
402 |
+
let first_alternative = &result.alternatives.as_ref().expect("should have")[0];
|
403 |
+
let slice = first_alternative.transcript.as_ref().expect("should have");
|
404 |
+
yield Ok(slice.clone());
|
405 |
+
}
|
406 |
+
}
|
407 |
+
}
|
408 |
+
otherwise => yield Err(StreamTranscriptionError::Unknown),
|
409 |
+
}
|
410 |
+
}
|
411 |
+
}
|
412 |
+
}
|
413 |
+
|
src/main.rs
CHANGED
@@ -57,48 +57,6 @@ enum ReplyEvent {
|
|
57 |
}
|
58 |
|
59 |
|
60 |
-
/// Transcribes an audio file to text.
|
61 |
-
/// # Arguments
|
62 |
-
///
|
63 |
-
/// * `-a AUDIO_FILE` - The name of the audio file.
|
64 |
-
/// It must be a WAV file, which is converted to __pcm__ format for Amazon Transcribe.
|
65 |
-
/// Amazon transcribe also supports __ogg-opus__ and __flac__ formats.
|
66 |
-
/// * `[-r REGION]` - The Region in which the client is created.
|
67 |
-
/// If not supplied, uses the value of the **AWS_REGION** environment variable.
|
68 |
-
/// If the environment variable is not set, defaults to **us-west-2**.
|
69 |
-
/// * `[-v]` - Whether to display additional information.
|
70 |
-
async fn stream_process(translate_client: aws_sdk_translate::Client,
|
71 |
-
polly_client: aws_sdk_polly::Client,
|
72 |
-
transcript_client: aws_sdk_transcribestreaming::Client,
|
73 |
-
mut rx: Receiver<Vec<u8>>,
|
74 |
-
tx: Sender<ReplyEvent>) -> Result<(), StreamTranscriptionError> {
|
75 |
-
|
76 |
-
let input_stream = stream! {
|
77 |
-
while let Some(raw) = rx.recv().await {
|
78 |
-
yield Ok(AudioStream::AudioEvent(AudioEvent::builder().audio_chunk(Blob::new(raw)).build()));
|
79 |
-
}
|
80 |
-
};
|
81 |
-
|
82 |
-
let output = transcript_client
|
83 |
-
.start_stream_transcription()
|
84 |
-
.language_code(LanguageCode::ZhCn)//LanguageCode::EnGb
|
85 |
-
.media_sample_rate_hertz(16000)
|
86 |
-
.media_encoding(MediaEncoding::Pcm)
|
87 |
-
.audio_stream(input_stream.into())
|
88 |
-
.send()
|
89 |
-
.await
|
90 |
-
.map_err(|e| StreamTranscriptionError::EstablishStreamError(Box::new(e)))?;
|
91 |
-
|
92 |
-
let output_stream = to_stream(output);
|
93 |
-
|
94 |
-
output_stream
|
95 |
-
.flat_map(|res| {
|
96 |
-
process(translate_client.clone(), polly_client.clone(), res)
|
97 |
-
})
|
98 |
-
.try_for_each(|reply| tx.send(reply).map_err(|e| StreamTranscriptionError::Shutdown))
|
99 |
-
.await
|
100 |
-
}
|
101 |
-
|
102 |
async fn translate(client: &aws_sdk_translate::Client, transcript: Option<String>, source_lang_code: Option<String>) -> Option<String> {
|
103 |
let res = client.translate_text()
|
104 |
.set_text(transcript)
|
@@ -122,9 +80,6 @@ async fn synthesize(client: &aws_sdk_polly::Client, transcript: String) -> Optio
|
|
122 |
|
123 |
#[derive(Clone)]
|
124 |
struct Context {
|
125 |
-
translate_client: aws_sdk_translate::Client,
|
126 |
-
polly_client: aws_sdk_polly::Client,
|
127 |
-
transcript_client: aws_sdk_transcribestreaming::Client,
|
128 |
lessons_manager: LessonsManager,
|
129 |
}
|
130 |
|
@@ -153,13 +108,7 @@ async fn main() -> Result<(), std::io::Error> {
|
|
153 |
}
|
154 |
|
155 |
let shared_config = aws_config::from_env().region(region_provider).load().await;
|
156 |
-
let transcript_client = aws_sdk_transcribestreaming::Client::new(&shared_config);
|
157 |
-
let translate_client = aws_sdk_translate::Client::new(&shared_config);
|
158 |
-
let polly_client = aws_sdk_polly::Client::new(&shared_config);
|
159 |
let ctx = Context {
|
160 |
-
translate_client,
|
161 |
-
polly_client,
|
162 |
-
transcript_client,
|
163 |
lessons_manager: LessonsManager::new(&shared_config),
|
164 |
};
|
165 |
|
@@ -170,7 +119,6 @@ async fn main() -> Result<(), std::io::Error> {
|
|
170 |
.show_files_listing()
|
171 |
.index_file("index.html"),
|
172 |
)
|
173 |
-
.at("/translate", get(stream_translate))
|
174 |
.at("/ws/lesson-speaker", get(stream_speaker))
|
175 |
.at("/ws/lesson-listener", get(stream_listener))
|
176 |
.at("lesson-speaker", StaticFileEndpoint::new("./static/index.html"))
|
@@ -284,70 +232,6 @@ async fn stream_listener(ctx: Data<&Context>, query: Query<LessonListenerQuery>,
|
|
284 |
})
|
285 |
}
|
286 |
|
287 |
-
#[handler]
|
288 |
-
async fn stream_translate(ctx: Data<&Context>, ws: WebSocket) -> impl IntoResponse {
|
289 |
-
let translate_client = ctx.translate_client.clone();
|
290 |
-
let polly_client = ctx.polly_client.clone();
|
291 |
-
let transcript_client = ctx.transcript_client.clone();
|
292 |
-
ws.on_upgrade(|mut socket| async move {
|
293 |
-
let (origin_tx, origin_rx) = channel::<Vec<u8>>(128);
|
294 |
-
let (translate_tx, mut translate_rx) = channel::<ReplyEvent>(128);
|
295 |
-
let stream_fut = stream_process(
|
296 |
-
translate_client,
|
297 |
-
polly_client,
|
298 |
-
transcript_client,
|
299 |
-
origin_rx,
|
300 |
-
translate_tx);
|
301 |
-
|
302 |
-
let ws_fut = async {
|
303 |
-
loop {
|
304 |
-
select! {
|
305 |
-
msg = socket.next() => {
|
306 |
-
match msg.as_ref() {
|
307 |
-
Some(Ok(Message::Binary(bin))) => {
|
308 |
-
origin_tx.send(bin.to_vec()).await.expect("failed to send");
|
309 |
-
},
|
310 |
-
Some(Ok(_)) => {
|
311 |
-
println!("Other: {:?}", msg);
|
312 |
-
},
|
313 |
-
Some(Err(e)) => {
|
314 |
-
println!("Error: {:?}", e);
|
315 |
-
},
|
316 |
-
None => {
|
317 |
-
socket.close().await.expect("failed to close");
|
318 |
-
println!("Other: {:?}", msg);
|
319 |
-
break;
|
320 |
-
}
|
321 |
-
}
|
322 |
-
},
|
323 |
-
output = translate_rx.recv() => {
|
324 |
-
if let Some(reply) = output {
|
325 |
-
match reply {
|
326 |
-
ReplyEvent::Transcribed(transcript) => {
|
327 |
-
println!("Transcribed: {}", transcript);
|
328 |
-
socket.send(Message::Text(transcript)).await.expect("failed to send");
|
329 |
-
},
|
330 |
-
ReplyEvent::Translated(translated) => {
|
331 |
-
println!("Translated: {}", translated);
|
332 |
-
socket.send(Message::Text(translated)).await.expect("failed to send");
|
333 |
-
},
|
334 |
-
ReplyEvent::Synthesized(raw) => {
|
335 |
-
println!("Synthesized: {:?}", raw.len());
|
336 |
-
socket.send(Message::Binary(raw)).await.expect("failed to send");
|
337 |
-
},
|
338 |
-
}
|
339 |
-
}
|
340 |
-
},
|
341 |
-
}
|
342 |
-
}
|
343 |
-
};
|
344 |
-
select! {
|
345 |
-
_ = stream_fut => {},
|
346 |
-
_ = ws_fut => {},
|
347 |
-
}
|
348 |
-
})
|
349 |
-
}
|
350 |
-
|
351 |
#[derive(Debug)]
|
352 |
enum StreamTranscriptionError {
|
353 |
EstablishStreamError(Box<dyn Error + Send + Sync>),
|
@@ -379,30 +263,6 @@ impl Error for StreamTranscriptionError {
|
|
379 |
}
|
380 |
}
|
381 |
|
382 |
-
fn to_stream(mut output: StartStreamTranscriptionOutput) -> impl Stream<Item=Result<String, StreamTranscriptionError>> {
|
383 |
-
stream! {
|
384 |
-
while let Some(event) = output
|
385 |
-
.transcript_result_stream
|
386 |
-
.recv()
|
387 |
-
.await
|
388 |
-
.map_err(|e| StreamTranscriptionError::TranscriptResultStreamError(Box::new(e)))? {
|
389 |
-
match event {
|
390 |
-
TranscriptResultStream::TranscriptEvent(transcript_event) => {
|
391 |
-
let transcript = transcript_event.transcript.expect("transcript");
|
392 |
-
for result in transcript.results.unwrap_or_default() {
|
393 |
-
if !result.is_partial {
|
394 |
-
let first_alternative = &result.alternatives.as_ref().expect("should have")[0];
|
395 |
-
let slice = first_alternative.transcript.as_ref().expect("should have");
|
396 |
-
yield Ok(slice.clone());
|
397 |
-
}
|
398 |
-
}
|
399 |
-
}
|
400 |
-
otherwise => yield Err(StreamTranscriptionError::Unknown),
|
401 |
-
}
|
402 |
-
}
|
403 |
-
}
|
404 |
-
}
|
405 |
-
|
406 |
fn process(translate_client: aws_sdk_translate::Client,
|
407 |
polly_client: aws_sdk_polly::Client,
|
408 |
res: Result<String, StreamTranscriptionError>) -> impl Stream<Item=Result<ReplyEvent, StreamTranscriptionError>> {
|
|
|
57 |
}
|
58 |
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
async fn translate(client: &aws_sdk_translate::Client, transcript: Option<String>, source_lang_code: Option<String>) -> Option<String> {
|
61 |
let res = client.translate_text()
|
62 |
.set_text(transcript)
|
|
|
80 |
|
81 |
#[derive(Clone)]
|
82 |
struct Context {
|
|
|
|
|
|
|
83 |
lessons_manager: LessonsManager,
|
84 |
}
|
85 |
|
|
|
108 |
}
|
109 |
|
110 |
let shared_config = aws_config::from_env().region(region_provider).load().await;
|
|
|
|
|
|
|
111 |
let ctx = Context {
|
|
|
|
|
|
|
112 |
lessons_manager: LessonsManager::new(&shared_config),
|
113 |
};
|
114 |
|
|
|
119 |
.show_files_listing()
|
120 |
.index_file("index.html"),
|
121 |
)
|
|
|
122 |
.at("/ws/lesson-speaker", get(stream_speaker))
|
123 |
.at("/ws/lesson-listener", get(stream_listener))
|
124 |
.at("lesson-speaker", StaticFileEndpoint::new("./static/index.html"))
|
|
|
232 |
})
|
233 |
}
|
234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
#[derive(Debug)]
|
236 |
enum StreamTranscriptionError {
|
237 |
EstablishStreamError(Box<dyn Error + Send + Sync>),
|
|
|
263 |
}
|
264 |
}
|
265 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
fn process(translate_client: aws_sdk_translate::Client,
|
267 |
polly_client: aws_sdk_polly::Client,
|
268 |
res: Result<String, StreamTranscriptionError>) -> impl Stream<Item=Result<ReplyEvent, StreamTranscriptionError>> {
|
static/index.html
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
<head>
|
5 |
<meta charset="utf-8">
|
6 |
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
|
7 |
-
<title>
|
8 |
<meta name="description" content="Google Cloud Speech Recognition with Node and Socket.io">
|
9 |
<meta name="viewport" content="width=device-width, initial-scale=1">
|
10 |
|
@@ -13,7 +13,7 @@
|
|
13 |
|
14 |
<body>
|
15 |
<div class="wrapper">
|
16 |
-
<h1>
|
17 |
|
18 |
<audio></audio>
|
19 |
|
|
|
4 |
<head>
|
5 |
<meta charset="utf-8">
|
6 |
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
|
7 |
+
<title>Polyhedron: Realtime Transcribe + Translate + Speech</title>
|
8 |
<meta name="description" content="Google Cloud Speech Recognition with Node and Socket.io">
|
9 |
<meta name="viewport" content="width=device-width, initial-scale=1">
|
10 |
|
|
|
13 |
|
14 |
<body>
|
15 |
<div class="wrapper">
|
16 |
+
<h1>Polyhedron: Realtime Transcribe + Translate + Speech</h1>
|
17 |
|
18 |
<audio></audio>
|
19 |
|