File size: 595 Bytes
79a071f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import gensim.downloader as api
model = api.load("glove-twitter-200")
print("Model loaded.")

print("new-york" in model.key_to_index)       # βœ… True if token is present
print("new" in model.key_to_index)            # βœ… Also true
print("new york" in model.key_to_index)       # ❌ False β€” space not valid

# Optional: print 5 most similar to test
if "new-york" in model.key_to_index:
    print(model.most_similar("new-york"))

compound_terms = [key for key in model.key_to_index if "-" in key]
print(f"Sample compound tokens: {compound_terms[:10]}")

model.save("../models/cbow_model.kv")