|
def make_token_tag(identifier):
|
|
"""
|
|
Feature engineering for identifiers, tags each character as a vowel, consonant, number, special character, or other.
|
|
|
|
Args:
|
|
identifier (str): The identifier to tag.
|
|
|
|
Returns:
|
|
str: A string of tag characters the same length as the input string.
|
|
"""
|
|
vowels = ["a", "e", "i", "o", "u"]
|
|
special = ["-", "_", "@"]
|
|
numbers = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
|
|
consonants = ["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z"]
|
|
tags = ""
|
|
for c in identifier.lower():
|
|
if c in vowels:
|
|
tags += "^"
|
|
elif c in special:
|
|
tags += "$"
|
|
elif c in numbers:
|
|
tags += "#"
|
|
elif c in consonants:
|
|
tags += "+"
|
|
else:
|
|
tags += "*"
|
|
return tags |