Spaces:
Sleeping
Sleeping
# Read text from a file | |
with open('text_file.txt', 'r', encoding='utf-8') as file: | |
text = file.read() | |
tokens = text.encode("utf-8") # raw bytes | |
tokens = list(map(int, tokens)) # convert to a list of integers in range 0..255 for convenience | |
print('---') | |
print("length of text:", len(text)) | |
print('---') | |
#print(tokens) | |
print('---') | |
print("length of tokens:", len(tokens)) | |
def get_stats(ids): | |
counts = {} | |
for pair in zip(ids, ids[1:]): # Pythonic way to iterate consecutive elements | |
counts[pair] = counts.get(pair, 0) + 1 | |
return counts | |
stats = get_stats(tokens) | |
print('---') | |
# print(stats) | |
#print(sorted(((v,k) for k,v in stats.items()), reverse=True)) | |
print('---') | |
top_pair = max(stats, key=stats.get) | |
print(top_pair) | |
#print(chr(224), chr(164)) |