ai-lab / prep /save_cbow_model.py
ClemSummer's picture
UI Enhancements
f66e5e6
raw
history blame contribute delete
595 Bytes
import gensim.downloader as api
model = api.load("glove-twitter-200")
print("Model loaded.")
print("new-york" in model.key_to_index) # βœ… True if token is present
print("new" in model.key_to_index) # βœ… Also true
print("new york" in model.key_to_index) # ❌ False β€” space not valid
# Optional: print 5 most similar to test
if "new-york" in model.key_to_index:
print(model.most_similar("new-york"))
compound_terms = [key for key in model.key_to_index if "-" in key]
print(f"Sample compound tokens: {compound_terms[:10]}")
model.save("../models/cbow_model.kv")