Akjava commited on
Commit
a68d2c7
·
verified ·
1 Parent(s): 9dc01a4

Update js-esm/text_to_arpa.js

Browse files
Files changed (1) hide show
  1. js-esm/text_to_arpa.js +91 -91
js-esm/text_to_arpa.js CHANGED
@@ -1,92 +1,92 @@
1
- import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
2
-
3
-
4
- async function text2text_generation(words,convert_ipa=false) {
5
- const generator = await pipeline('text2text-generation', 'mini-bart-g2p',{quantized: false});
6
- const inputTexts = words;
7
- const options = { max_new_tokens: 100 };
8
- const outputs = await generator(inputTexts, options);
9
-
10
- if (convert_ipa){
11
- const ipas = []
12
- outputs.forEach(output => {
13
- const ipa = arpa_to_ipa(output.generated_text).replace(/\s/g, "")
14
- ipas.push(ipa)
15
- });
16
- return ipas
17
- }else{
18
- return outputs //arpa
19
- }
20
-
21
- }
22
-
23
-
24
- async function textToArpa(cmudict,text,replace_questions=false){
25
- if (replace_questions){
26
- text = text.replaceAll("!",".").replaceAll("?",".")
27
- }
28
- const cleanedString = text.replace(/[^a-zA-Z0-9.,!? ]/g, '');
29
-
30
- const dict = wordsToArpa(cmudict,cleanedString)
31
-
32
-
33
-
34
- const result = dict["result"]
35
- const non_converted = dict["non_converted"]
36
-
37
- let arpa_text = result.join(" ");
38
-
39
- //console.log(non_converted.length)
40
- if (non_converted.length > 0){
41
- console.log("non_converted length = "+non_converted.length)
42
- const arpas = await text2text_generation(non_converted)
43
- console.log(arpas)
44
- for (let i = 0; i < non_converted.length; i++) {
45
- const word = non_converted[i]
46
- const arpa = arpas[i].generated_text
47
- console.log("@"+word,arpa)
48
- arpa_text = arpa_text.replace("@"+word,arpa)
49
- }
50
- }
51
- return arpa_text
52
-
53
- }
54
-
55
- function get_arpa(cmudict,word){
56
- return cmudict[word.toUpperCase()]
57
- }
58
-
59
- function wordsToArpa(cmudict,text){
60
- var keep_words = [",",".","!","?"]
61
- let inputText = text.toUpperCase()
62
- keep_words.forEach(function(key){
63
- inputText = inputText.replaceAll(key," "+key+" ");
64
- });
65
- //console.log(`replaced ${inputText}`)
66
-
67
- let result = []
68
- let non_converted = []
69
- var words = inputText.split(" ")
70
-
71
- words.forEach(word => {
72
-
73
- if (keep_words.includes(word)){//,.!? just keep
74
- result.push(word)
75
- }else if (word ==""){
76
-
77
- }else{
78
- const arpa = get_arpa(cmudict,word)
79
-
80
- if (typeof arpa == "undefined"){
81
- result.push("@"+word)
82
- non_converted.push(word)
83
- }else{
84
- result.push(arpa)
85
- }
86
- }
87
- });
88
-
89
- return {"result":result,"non_converted":non_converted}
90
- }
91
-
92
  export{env,textToArpa}
 
1
+ import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
2
+ env.backends.onnx.logSeverityLevel = 3
3
+
4
+ async function text2text_generation(words,convert_ipa=false) {
5
+ const generator = await pipeline('text2text-generation', 'mini-bart-g2p',{quantized: false});
6
+ const inputTexts = words;
7
+ const options = { max_new_tokens: 100 };
8
+ const outputs = await generator(inputTexts, options);
9
+
10
+ if (convert_ipa){
11
+ const ipas = []
12
+ outputs.forEach(output => {
13
+ const ipa = arpa_to_ipa(output.generated_text).replace(/\s/g, "")
14
+ ipas.push(ipa)
15
+ });
16
+ return ipas
17
+ }else{
18
+ return outputs //arpa
19
+ }
20
+
21
+ }
22
+
23
+
24
+ async function textToArpa(cmudict,text,replace_questions=false){
25
+ if (replace_questions){
26
+ text = text.replaceAll("!",".").replaceAll("?",".")
27
+ }
28
+ const cleanedString = text.replace(/[^a-zA-Z0-9.,!? ]/g, '');
29
+
30
+ const dict = wordsToArpa(cmudict,cleanedString)
31
+
32
+
33
+
34
+ const result = dict["result"]
35
+ const non_converted = dict["non_converted"]
36
+
37
+ let arpa_text = result.join(" ");
38
+
39
+ //console.log(non_converted.length)
40
+ if (non_converted.length > 0){
41
+ console.log("non_converted length = "+non_converted.length)
42
+ const arpas = await text2text_generation(non_converted)
43
+ console.log(arpas)
44
+ for (let i = 0; i < non_converted.length; i++) {
45
+ const word = non_converted[i]
46
+ const arpa = arpas[i].generated_text
47
+ console.log("@"+word,arpa)
48
+ arpa_text = arpa_text.replace("@"+word,arpa)
49
+ }
50
+ }
51
+ return arpa_text
52
+
53
+ }
54
+
55
+ function get_arpa(cmudict,word){
56
+ return cmudict[word.toUpperCase()]
57
+ }
58
+
59
+ function wordsToArpa(cmudict,text){
60
+ var keep_words = [",",".","!","?"]
61
+ let inputText = text.toUpperCase()
62
+ keep_words.forEach(function(key){
63
+ inputText = inputText.replaceAll(key," "+key+" ");
64
+ });
65
+ //console.log(`replaced ${inputText}`)
66
+
67
+ let result = []
68
+ let non_converted = []
69
+ var words = inputText.split(" ")
70
+
71
+ words.forEach(word => {
72
+
73
+ if (keep_words.includes(word)){//,.!? just keep
74
+ result.push(word)
75
+ }else if (word ==""){
76
+
77
+ }else{
78
+ const arpa = get_arpa(cmudict,word)
79
+
80
+ if (typeof arpa == "undefined"){
81
+ result.push("@"+word)
82
+ non_converted.push(word)
83
+ }else{
84
+ result.push(arpa)
85
+ }
86
+ }
87
+ });
88
+
89
+ return {"result":result,"non_converted":non_converted}
90
+ }
91
+
92
  export{env,textToArpa}