File size: 3,469 Bytes
e5fc5ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import enum
import subprocess
import spacy
import pyinflect
from typing import List, Union, Tuple

# BES	auxiliary “be”		Let it **be**.
# HVS	forms of “have”		I**’ve** seen the Queen
# MD	verb, modal auxiliary	VerbType=mod	This **could** work.
# VB	verb, base form	VerbForm=inf	I want to **go**.
# VBD	verb, past tense	VerbForm=fin Tense=past	This **was** a sentence.
# VBG	verb, gerund or present participle	VerbForm=part Tense=pres Aspect=prog	I am **going**.
# VBN	verb, past participle	VerbForm=part Tense=past Aspect=perf	The treasure was **lost**.
# VBP	verb, non-3rd person singular present	VerbForm=fin Tense=pres	I **want** to go.
# VBZ	verb, 3rd person singular present	VerbForm=fin Tense=pres Number=sing Person=3	He **wants** to go.

class Tense(enum.Enum):
    simple_present  = {
        'aux':[None,'VBZ'], 
        'main':['VBZ','VBP', 'VB'], 
        'tobe':{'NN':'is{}','NNS':'are{}'}
    }
    simple_past     = {
        'aux':[None, 'VBD'], 
        'main':['VBD', 'VB'], 
        'tobe':{'NN':'was{}','NNS':'were{}'}
    }
    future_simple   = {
        'aux':['MD'], 
        'main':['VB'], 
        'tobe':{'NN':'will{} be','NNS':'will{} be'}
    }
    present_cont    = {
        'aux':['VBP','VBZ'],
        'main':['VBG'], 
        'tobe':{'NN':'is{} being','NNS':'are{} being'}
    }
    past_cont       = {
        'aux':['VBD'],
        'main':['VBG'], 
        'tobe':{'NN':'was{} being','NNS':'were{} being'}
    }
    present_perfect = {
        'aux':['VBP','VBZ'],
        'main':['VBN'], 
        'tobe':{'NN':'has{} been','NNS':'have{} been'}
    }

class Parser:
    def __init__(
        self
    ) -> None:
        
        self.parser = None
        self.__init_parser(model="en_core_web_sm")

    def __init_parser(
        self,
        model: str
    ) -> None:

        self.parser = None
        try:
            self.parser = spacy.load(model)
        except:
            print(f"* Downloading {model} model...")
            _ = subprocess.Popen(
                f"python -m spacy download {model}", 
                stdout=subprocess.PIPE, 
                shell=True).communicate()

            self.parser = spacy.load(model)
    
    def verb2participle(
        self,
        verb: str
    ) -> str:

        tk = self.parser(verb)[0]
        return tk._.inflect('VBN')
    
    def subj2obj(
        self,
        pronoun: str
    ) -> str:
        """
        Convert Subject pronouns to Object pronouns.
        """
        mapping = {"i":"me","you":"you","we":"us","they":"them","he":"him","she":"her", "it":"it"}
        return mapping.get(pronoun.lower(), None)
    
    def get_gramatical_number(
        self,
        dobj_data: List[List[Tuple[str,str,str]]]
    ) -> Union[str, None]:

        result = [tag for _,dep,tag in dobj_data if dep == 'dobj']
        if len(result) == 0:
            result = None
        else:
            result = result[0].replace('NNP', 'NN')

        return result
    
    def get_verbal_tense(
        self,
        verb_data: List[List[Tuple[str,str,str,int]]]
    ) -> Union[str, None]:
        
        aux, neg, root = verb_data

        root = root[0][2] if len(root) > 0 else None
        aux = aux[0][2] if len(aux) > 0 else None

        tense_name = None
        for tense in Tense:
            if aux in tense.value['aux'] and root in tense.value['main']:
                tense_name = tense.name
                break
        
        return tense_name