Add support for African voices.
Browse files- app.py +8 -1
- src/lookups.py +2 -1
- src/synthesize.py +24 -0
app.py
CHANGED
|
@@ -63,6 +63,7 @@ type=['wav'])
|
|
| 63 |
finetuned_mms4 = synth_mms(tts_text, "khof312/mms-tts-spa-female")
|
| 64 |
if tts_lang=="lin":
|
| 65 |
finetuned_mms1 = synth_mms(tts_text, "khof312/mms-tts-lin-female")
|
|
|
|
| 66 |
|
| 67 |
#vc_mms
|
| 68 |
#vc_coqui
|
|
@@ -152,14 +153,20 @@ type=['wav'])
|
|
| 152 |
"### Fine Tuned"
|
| 153 |
row1 = st.columns([1,1,2])
|
| 154 |
row2 = st.columns([1,1,2])
|
|
|
|
| 155 |
|
| 156 |
row1[0].write("**Model**")
|
| 157 |
row1[1].write("**Configuration**")
|
| 158 |
row1[2].write("**Audio**")
|
| 159 |
|
| 160 |
row2[0].write(f"Meta MMS")
|
| 161 |
-
row2[1].write("[khof312 -
|
| 162 |
row2[2].audio(finetuned_mms1[0], sample_rate = finetuned_mms1[1])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
st.divider()
|
| 165 |
|
|
|
|
| 63 |
finetuned_mms4 = synth_mms(tts_text, "khof312/mms-tts-spa-female")
|
| 64 |
if tts_lang=="lin":
|
| 65 |
finetuned_mms1 = synth_mms(tts_text, "khof312/mms-tts-lin-female")
|
| 66 |
+
finetuned_africanvoices = synth_africanvoices(tts_text, models[tts_lang]['africanvoices'])
|
| 67 |
|
| 68 |
#vc_mms
|
| 69 |
#vc_coqui
|
|
|
|
| 153 |
"### Fine Tuned"
|
| 154 |
row1 = st.columns([1,1,2])
|
| 155 |
row2 = st.columns([1,1,2])
|
| 156 |
+
row3 = st.columns([1,1,2])
|
| 157 |
|
| 158 |
row1[0].write("**Model**")
|
| 159 |
row1[1].write("**Configuration**")
|
| 160 |
row1[2].write("**Audio**")
|
| 161 |
|
| 162 |
row2[0].write(f"Meta MMS")
|
| 163 |
+
row2[1].write("[khof312 - female](https://huggingface.co/khof312/mms-tts-lin-female)")
|
| 164 |
row2[2].audio(finetuned_mms1[0], sample_rate = finetuned_mms1[1])
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
row3[0].write(f"African voices")
|
| 168 |
+
row3[1].write("[African Voices]()")
|
| 169 |
+
row3[2].audio(finetuned_africanvoices[0], sample_rate = finetuned_africanvoices[1])
|
| 170 |
|
| 171 |
st.divider()
|
| 172 |
|
src/lookups.py
CHANGED
|
@@ -82,7 +82,8 @@ models = {
|
|
| 82 |
'coqui': 'tts_models/lin/openbible/vits', # Sampling rate: 22050
|
| 83 |
'espeakng': None,
|
| 84 |
'toucan': 'Lingala (lin)',
|
| 85 |
-
'piper': None
|
|
|
|
| 86 |
},
|
| 87 |
'mos':{
|
| 88 |
'mms': 'facebook/mms-tts-mos',
|
|
|
|
| 82 |
'coqui': 'tts_models/lin/openbible/vits', # Sampling rate: 22050
|
| 83 |
'espeakng': None,
|
| 84 |
'toucan': 'Lingala (lin)',
|
| 85 |
+
'piper': None,
|
| 86 |
+
'africanvoices': 'cmu_lin_ope',
|
| 87 |
},
|
| 88 |
'mos':{
|
| 89 |
'mms': 'facebook/mms-tts-mos',
|
src/synthesize.py
CHANGED
|
@@ -88,7 +88,31 @@ def synth_espeakng(text:str, model:str):
|
|
| 88 |
return wav, sampling_rate
|
| 89 |
else:
|
| 90 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
def synth_toucan(text:str, model:str):
|
| 94 |
'''
|
|
|
|
| 88 |
return wav, sampling_rate
|
| 89 |
else:
|
| 90 |
return None
|
| 91 |
+
|
| 92 |
+
def synth_africanvoices(text:str, model:str):
|
| 93 |
+
'''
|
| 94 |
+
Use ESpeak-NG to synthesize text.
|
| 95 |
+
|
| 96 |
+
Inputs:
|
| 97 |
+
text: Text to synthesze
|
| 98 |
+
model: Model code
|
| 99 |
+
Returns:
|
| 100 |
+
Streaming Wav and sampling rate.
|
| 101 |
+
'''
|
| 102 |
+
if model is not None:
|
| 103 |
+
|
| 104 |
+
subprocess.run(['flite', f'-voice {model}.flitevox', f'"{text}"', " test.wav"])
|
| 105 |
+
#esng = espeakng.Speaker()
|
| 106 |
+
#esng.voice = model
|
| 107 |
+
#esng.say(text, export_path="test.wav")
|
| 108 |
|
| 109 |
+
sampling_rate, wav = wavfile.read('test.wav')
|
| 110 |
+
os.remove("test.wav")
|
| 111 |
+
|
| 112 |
+
#wav = tts.tts(text=text)
|
| 113 |
+
return wav, sampling_rate
|
| 114 |
+
else:
|
| 115 |
+
return None
|
| 116 |
|
| 117 |
def synth_toucan(text:str, model:str):
|
| 118 |
'''
|