kyleluoma's picture
Upload 2 files
d922b58 verified
raw
history blame contribute delete
910 Bytes
def make_token_tag(identifier):
"""
Feature engineering for identifiers, tags each character as a vowel, consonant, number, special character, or other.
Args:
identifier (str): The identifier to tag.
Returns:
str: A string of tag characters the same length as the input string.
"""
vowels = ["a", "e", "i", "o", "u"]
special = ["-", "_", "@"]
numbers = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
consonants = ["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z"]
tags = ""
for c in identifier.lower():
if c in vowels:
tags += "^"
elif c in special:
tags += "$"
elif c in numbers:
tags += "#"
elif c in consonants:
tags += "+"
else:
tags += "*"
return tags