File size: 377 Bytes
cc0b62b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
""" Creates a vocabulary from a tsv file.
"""
import codecs
import example_helper
from torchmoji.create_vocab import VocabBuilder
from torchmoji.word_generator import TweetWordGenerator
with codecs.open('../../twitterdata/tweets.2016-09-01', 'rU', 'utf-8') as stream:
wg = TweetWordGenerator(stream)
vb = VocabBuilder(wg)
vb.count_all_words()
vb.save_vocab()
|