Everton Aleixo
commited on
Commit
·
d669329
1
Parent(s):
9578405
Change asr. Add translation.
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
| 9 |
|
| 10 |
# load speech translation checkpoint
|
| 11 |
asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device=device)
|
|
|
|
| 12 |
|
| 13 |
# load text-to-speech checkpoint and speaker embeddings
|
| 14 |
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
|
@@ -21,9 +22,13 @@ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze
|
|
| 21 |
|
| 22 |
|
| 23 |
def translate(audio):
|
| 24 |
-
outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language":"
|
| 25 |
print('outputs', outputs)
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def synthesise(text):
|
|
|
|
| 9 |
|
| 10 |
# load speech translation checkpoint
|
| 11 |
asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device=device)
|
| 12 |
+
translation_pipe = pipeline("translation", model="alirezamsh/small100")
|
| 13 |
|
| 14 |
# load text-to-speech checkpoint and speaker embeddings
|
| 15 |
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
def translate(audio):
|
| 25 |
+
outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language":"english"})
|
| 26 |
print('outputs', outputs)
|
| 27 |
+
text = outputs["text"]
|
| 28 |
+
translation = translation_pipe(text, src_lang='pt', tgt_lang='pt')[0]
|
| 29 |
+
print('translation', translation)
|
| 30 |
+
|
| 31 |
+
return translation['translation_text']
|
| 32 |
|
| 33 |
|
| 34 |
def synthesise(text):
|