File size: 910 Bytes
d922b58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def make_token_tag(identifier):
    """

    Feature engineering for identifiers, tags each character as a vowel, consonant, number, special character, or other.



    Args:

        identifier (str): The identifier to tag.

        

    Returns:

        str: A string of tag characters the same length as the input string.

    """
    vowels = ["a", "e", "i", "o", "u"]
    special = ["-", "_", "@"]
    numbers = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
    consonants = ["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z"]
    tags = ""
    for c in identifier.lower():
        if c in vowels:
            tags += "^"
        elif c in special:
            tags += "$"
        elif c in numbers:
            tags += "#"
        elif c in consonants:
            tags += "+"
        else:
            tags += "*"
    return tags