File size: 803 Bytes
d8b92ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Read text from a file
with open('text_file.txt', 'r', encoding='utf-8') as file:
    text = file.read()

tokens = text.encode("utf-8")  # raw bytes
tokens = list(map(int, tokens))  # convert to a list of integers in range 0..255 for convenience

print('---')
print("length of text:", len(text))
print('---')
#print(tokens)
print('---')
print("length of tokens:", len(tokens))

def get_stats(ids):
    counts = {}
    for pair in zip(ids, ids[1:]): # Pythonic way to iterate consecutive elements
        counts[pair] = counts.get(pair, 0) + 1
    return counts

stats = get_stats(tokens)
print('---')
# print(stats)
#print(sorted(((v,k) for k,v in stats.items()), reverse=True))

print('---')
top_pair = max(stats, key=stats.get)
print(top_pair)

#print(chr(224), chr(164))