Spaces:
Sleeping
Sleeping
File size: 871 Bytes
4ee33aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
import re
from pypinyin import pinyin, lazy_pinyin, Style
with open('data/bpe_train-set.txt', 'w') as out:
#aishell3
with open('data/label_train-set.txt', 'r') as f:
for i,line in enumerate(f):
if i<5:
continue
text = line.strip().split('|')[2].replace('% ','').replace('$','').replace('%','')
pinyin = ' '.join(lazy_pinyin(text, style=Style.TONE3, neutral_tone_with_five=True))
out.write(pinyin+'\n')
#data-baker 1w
with open('data/000001-010000.txt', 'r') as f:
for i,line in enumerate(f):
if i%2==1:
continue
text = line.strip().split('\t')[1]
text = re.sub(r'[#\d]', '', text)
pinyin = ' '.join(lazy_pinyin(text, style=Style.TONE3, neutral_tone_with_five=True))
out.write(pinyin+'\n')
|