pragsGit commited on
Commit
baa5fe1
·
verified ·
1 Parent(s): 74dbc56

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline
3
+ from transformers import VitsModel, VitsTokenizer
4
+ import numpy as np
5
+ import gradio as gr
6
+
7
+ target_dtype = np.int16
8
+ max_range = np.iinfo(target_dtype).max
9
+
10
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
+ pipe = pipeline(
12
+ "automatic-speech-recognition",
13
+ model="openai/whisper-base",
14
+ device=device
15
+ )
16
+
17
+ def translate(audio):
18
+ outputs = pipe(
19
+ audio,
20
+ max_new_tokens=256,
21
+ generate_kwargs={"task": "transcribe", "language": "hi"}
22
+ )
23
+
24
+ model = VitsModel.from_pretrained("facebook/mms-tts-deu")
25
+ tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-deu")
26
+
27
+ def synthesise(text):
28
+ inputs=tokenizer(text, return_tensors="pt")
29
+ input_ids = inputs["input_ids"]
30
+ with torch.no_grad():
31
+ outputs = model(input_ids)
32
+ return outputs["waveform"]
33
+
34
+ def speech_to_speech_translation(audio):
35
+ translated_text = translate(audio)
36
+ synthesised_speech = synthesise(translated_text)
37
+ synthesised_speech = (synthesised_speech.numpy() * max_range).astype(np.int16)
38
+ return 16000, synthesised_speech
39
+
40
+ demo = gr.Blocks()
41
+
42
+ mic_translate = gr.Interface(
43
+ fn=speech_to_speech_translation,
44
+ inputs=gr.Audio(source="microphone", type="filepath"),
45
+ outputs=gr.Audio(label="Generated Speech", type="numpy"),
46
+ )
47
+
48
+ file_translate = gr.Interface(
49
+ fn=speech_to_speech_translation,
50
+ inputs=gr.Audio(source="upload", type="filepath"),
51
+ outputs=gr.Audio(label="Generated Speech", type="numpy"),
52
+ )
53
+
54
+ with demo:
55
+ gr.TabbedInterface([mic_translate, file_translate], ["Microphone", "Audio File"])
56
+
57
+ demo.launch(debug=True)