File size: 11,393 Bytes
95b880a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
#!/usr/bin/env python
# coding: utf-8

# In[1]:


#Install All the Required Dependencies
#!pip3 install torch torchvision torchaudio
#!pip install transformers ipywidgets gradio --upgrade
#!pip install --upgrade transformers accelerate
#!pip install --upgrade gradio
#!pip install nltk
#!pip install jiwer
#!pip install sentencepiece
#!pip install sacremoses
#!pip install soundfile
#!pip install librosa numpy jiwer nltk
#!pip install --upgrade pip 
#!pip install huggingface_hub


# In[2]:


#Import Required Libraries
from transformers import pipeline
from jiwer import wer
from transformers import VitsModel, AutoTokenizer, set_seed
import torch
import soundfile as sf
import librosa
from scipy.spatial.distance import euclidean
import numpy as np
import string
import os
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score
import string
import numpy as np
import librosa
from scipy.spatial.distance import euclidean
import string


# In[3]:


import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')  # Optional if using WordNet's multilingual features
import nltk
print(nltk.data.path)
import nltk
nltk.data.path.append('./nltk_data')


# In[4]:


#Define all Utility Functions
# Function to compute BLEU score
def compute_bleu(reference_text, predicted_text):
    """
    Computes the BLEU score for a single translation.
    :param reference_text: The ground truth text (in Yoruba).
    :param predicted_text: The machine-generated translation text (in Yoruba).
    :return: BLEU score (float).
    """
    print("The Reference Text = ", reference_text)
    print("The Predicted Text = ",predicted_text)
    # Tokenize the reference and predicted texts
    reference_tokens = [reference_text.split()]  # Reference should be wrapped in a list
    predicted_tokens = predicted_text.split()

    # Add smoothing to handle cases with few n-gram matches
    smoothing_function = SmoothingFunction().method1

    # Compute BLEU score
    bleu_score = sentence_bleu(reference_tokens, predicted_tokens, smoothing_function=smoothing_function)
    #print("The Computed bleu_score in the Compute_Blue Fn = ",bleu_score)
    return round(bleu_score,2)
# Function to compute Word Error Rate (WER)
def compute_wer(reference_text, predicted_text):
    """
    Computes the Word Error Rate (WER) for a single translation.
    :param reference_text: The ground truth text (in Yoruba).
    :param predicted_text: The machine-generated translation text (in Yoruba).
    :return: WER score (float).
    """
    # Normalize text: lowercase and remove punctuation
    reference_text = reference_text.lower().translate(str.maketrans('', '', string.punctuation))
    predicted_text = predicted_text.lower().translate(str.maketrans('', '', string.punctuation))

    # Compute WER
    wer_score = wer(reference_text, predicted_text)

    return round(wer_score,2)

# Function to compute METEOR score
def compute_meteor(reference_text, predicted_text):
    """
    Computes the METEOR score for a single translation.
    :param reference_text: The ground truth text (in Yoruba).
    :param predicted_text: The machine-generated translation text (in Yoruba).
    :return: METEOR score (float).
    """
    # Normalize text: lowercase and remove punctuation
    reference_text = reference_text.lower().translate(str.maketrans('', '', string.punctuation))
    predicted_text = predicted_text.lower().translate(str.maketrans('', '', string.punctuation))

    # Tokenize text into lists of words
    reference_tokens = reference_text.split()
    predicted_tokens = predicted_text.split()

    # Compute METEOR score
    meteor = meteor_score([reference_tokens], predicted_tokens)
    
    return round(meteor,2)

# Function to compute Mel Cepstral Distance (MCD)
def compute_mcd(ground_truth_audio_path, predicted_audio_path):
    """
    Computes the Mel Cepstral Distance (MCD) between two audio files.
    :param ground_truth_audio_path: Path to the ground truth audio file.
    :param predicted_audio_path: Path to the predicted audio file.
    :return: MCD score (float).
    """
    # Load audio files
    y_true, sr_true = librosa.load(ground_truth_audio_path, sr=16000)
    y_pred, sr_pred = librosa.load(predicted_audio_path, sr=16000)

    # Ensure the sampling rates match
    assert sr_true == sr_pred, "Sampling rates do not match between audio files."

    # Compute MFCCs
    mfcc_true = librosa.feature.mfcc(y=y_true, sr=sr_true, n_mfcc=13).T
    mfcc_pred = librosa.feature.mfcc(y=y_pred, sr=sr_pred, n_mfcc=13).T

    # Align the MFCC frames
    min_frames = min(len(mfcc_true), len(mfcc_pred))
    mfcc_true = mfcc_true[:min_frames]
    mfcc_pred = mfcc_pred[:min_frames]

    # Compute the Euclidean distance for each frame and average
    mcd = 0.0
    for i in range(min_frames):
        mcd += euclidean(mfcc_true[i], mfcc_pred[i])
    mcd = (10.0 / np.log(10)) * (mcd / min_frames)

    return round(mcd,2)


# In[5]:


#Define Translation and Synthesis Function
def translate_transformers(modelName, sourceLangText):
    #results = translation_pipeline(input_text)
    translation_pipeline = pipeline('translation_en_to_yo', model = modelName, max_length=500)
    translated_text = translation_pipeline(sourceLangText) #translator(text)[0]["translation_text"]
    translated_text_target = translated_text[0]['translation_text']
    #reference_translations = "awon apositeli, awon woli, awon ajinrere ati awon oluso agutan ati awon oluko." #'recorder_2024-01-13_11-24-41_453538.wav'#"My name is Joy, I love reading"
   
    #TTS for the translated_text_target
    #TTS Exp1
    ttsModel = VitsModel.from_pretrained("facebook/mms-tts-yor")
    tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-yor")
    ttsInputs = tokenizer(translated_text_target, return_tensors="pt")
    set_seed(555)  # make deterministic
    with torch.no_grad():
        ttsOutput = ttsModel(**ttsInputs).waveform
    #Convert the tensor to a numpy array
    ttsWaveform = ttsOutput.numpy()[0]    
    #Save the waveform to an audio file
    #sf.write('output.wav', waveform, 22050)
    sf.write('ttsOutput.wav', ttsWaveform, 16000)
    
    # Sample ground truth and predicted text2text translations for Clinical Text
    #ground_truth_text = "Àrùn jẹjẹrẹ ọmú jẹ́ ọ̀kan pàtàkì lára ohun tó ń ṣàkóbá fún ìlera gbogbo ènìyàn ní Nàìjíríà, ó sì jẹ́ ọ̀kan pàtàkì lára ohun tó ń fa ikú àwọn obìnrin tí àrùn jẹjẹrẹ ń pa lórílẹ̀-èdè náà."
    #predicted_text = translated_text_target #"<extra_id_0> breast cancer is a"

    # Sample ground truth and predicted text2text translations for News Text
    #ground_truth_text = "Wọ́n ní ìgbà àkọ́kọ́ nìyí tí irú ìwà ipá bẹ́ẹ̀ máa wáyé ní ìpínlẹ̀ Ondo."
    #predicted_text = translated_text_target #"<extra_id_0> breast cancer is a"

    # Sample ground truth and predicted text2text translations for Religion Text
    ground_truth_text = "Àwọn aposteli, àwọn wòlíì, àwọn ajíhìnrere, àwọn olùṣọ́-àgùntàn àti àwọn olùkọ́."
    predicted_text = translated_text_target #"<extra_id_0> breast cancer is a"
    
    #Compute bleu_score
    bleu_score = compute_bleu(ground_truth_text, predicted_text)
    print(f"Bleu Score (BLEU): {bleu_score:.2f}")
    
    #Compute WER
    wer_score = compute_wer(ground_truth_text, predicted_text)
    print(f"Word Error Rate (WER): {wer_score:.2f}")

    #Compute METEOR
    meteor = compute_meteor(ground_truth_text, predicted_text)
    print(f"METEOR Score: {meteor:.2f}")

    # Paths to sample audio files for MCD computation in current directory
    ground_truth_audio = os.path.join(os.getcwd(), "gt_ttsOutput.wav")
    predicted_audio = os.path.join(os.getcwd(), "ttsOutput.wav")

    # Compute Mel Cepstral Distance (MCD)
    try:
        mcd = compute_mcd(ground_truth_audio, predicted_audio)
        print(f"Mel Cepstral Distance (MCD): {mcd:.2f}")
    except Exception as e:
        print(f"Error computing MCD: {e}")
    
    return translated_text_target,bleu_score,wer_score,meteor,mcd,'ttsOutput.wav'


# In[6]:


#Define User Interface Function using Gradio and IPython Libraries
import gradio as gr
from IPython.display import Audio
interface = gr.Interface(
    fn=translate_transformers,
    inputs=[
        gr.Dropdown(["Davlan/byt5-base-eng-yor-mt", #Exp1
                     "Davlan/m2m100_418M-eng-yor-mt", #Exp2
                     "Davlan/mbart50-large-eng-yor-mt", #Exp3
                     "Davlan/mt5_base_eng_yor_mt", #Exp4
                     "omoekan/opus-tatoeba-eng-yor", #Exp5
                     "masakhane/afrimt5_en_yor_news", #Exp6
                     "masakhane/afrimbart_en_yor_news", #Exp7
                     "masakhane/afribyt5_en_yor_news", #Exp8
                     "masakhane/byt5_en_yor_news", #Exp9
                     "masakhane/mt5_en_yor_news", #Exp10
                     "masakhane/mbart50_en_yor_news", #Exp11
                     "masakhane/m2m100_418M_en_yor_news", #Exp12
                     "masakhane/m2m100_418M_en_yor_rel_news", #Exp13
                     "masakhane/m2m100_418M_en_yor_rel_news_ft", #Exp14
                     "masakhane/m2m100_418M_en_yor_rel", #Exp15
                     "dabagyan/menyo_en2yo", #Exp16
                     #"facebook/nllb-200-distilled-600M", #Exp17
                     #"facebook/nllb-200-3.3B", #Exp18
                     #"facebook/nllb-200-1.3B", #Exp19
                     #"facebook/nllb-200-distilled-1.3B",  #Exp20
                     #"keithhon/nllb-200-3.3B" #Exp21
                     #"CohereForAI/aya-101" #Exp22
                     "facebook/m2m100_418M", #Exp17
                     #"facebook/m2m100_1.2B",#Exp18
                     #"facebook/m2m100-12B-avg-5-ckpt", #Exp19
                     "google/mt5-base", #Exp20
                     "google/byt5-large" #Exp21
                     ], 
                     label="Select Finetuned Eng2Yor Translation Model"),
        gr.Textbox(lines=2, placeholder="Enter English Text Here...", label="English Text")  
    ],
    #outputs = "text",
    #outputs=outputs=["text", "text"],#"text"
    #outputs= gr.Textbox(value="text", label="Translated Text"),
    outputs=[
        gr.Textbox(value="text", label="Translated Yoruba Text"),
        #gr.Textbox(value="text", label=translated_text_actual),
        gr.Textbox(value="number", label="BLEU SCORE"),
        gr.Textbox(value="number", label="WER(WORD ERROR RATE) SCORE - The Lower the Better"),
        gr.Textbox(value="number", label="METEOR SCORE"),
        gr.Textbox(value="number", label="MCD(MEL CESPRAL DISTANCE) SCORE"),
        gr.Audio(type="filepath", label="Click to Generate Yoruba Speech from the Translated Text")
    ],
    title="ASPMIR-MACHINE-TRANSLATION-TESTBED FOR LOW RESOURCED AFRICAN LANGUAGES",
    #gr.Markdown("**This Tool Allows Developers and Researchers to Carry Out Experiments on Low Resourced African Languages with State-of-the-Art NMT Finetuned Models.**"),
    description="{This Tool Allows Developers and Researchers to Carry Out Experiments on Low Resourced African Languages with State-of-the-Art Pretrained or Finetuned Models.}"
)
#interface.launch(share=True)


# In[7]:


if __name__ == "__main__":
    interface.launch(share=True)