Spaces:
Running
Running
import enum | |
import subprocess | |
import spacy | |
import pyinflect | |
from typing import List, Union, Tuple | |
# BES auxiliary “be” Let it **be**. | |
# HVS forms of “have” I**’ve** seen the Queen | |
# MD verb, modal auxiliary VerbType=mod This **could** work. | |
# VB verb, base form VerbForm=inf I want to **go**. | |
# VBD verb, past tense VerbForm=fin Tense=past This **was** a sentence. | |
# VBG verb, gerund or present participle VerbForm=part Tense=pres Aspect=prog I am **going**. | |
# VBN verb, past participle VerbForm=part Tense=past Aspect=perf The treasure was **lost**. | |
# VBP verb, non-3rd person singular present VerbForm=fin Tense=pres I **want** to go. | |
# VBZ verb, 3rd person singular present VerbForm=fin Tense=pres Number=sing Person=3 He **wants** to go. | |
class Tense(enum.Enum): | |
simple_present = { | |
'aux':[None,'VBZ'], | |
'main':['VBZ','VBP', 'VB'], | |
'tobe':{'NN':'is{}','NNS':'are{}'} | |
} | |
simple_past = { | |
'aux':[None, 'VBD'], | |
'main':['VBD', 'VB'], | |
'tobe':{'NN':'was{}','NNS':'were{}'} | |
} | |
future_simple = { | |
'aux':['MD'], | |
'main':['VB'], | |
'tobe':{'NN':'will{} be','NNS':'will{} be'} | |
} | |
present_cont = { | |
'aux':['VBP','VBZ'], | |
'main':['VBG'], | |
'tobe':{'NN':'is{} being','NNS':'are{} being'} | |
} | |
past_cont = { | |
'aux':['VBD'], | |
'main':['VBG'], | |
'tobe':{'NN':'was{} being','NNS':'were{} being'} | |
} | |
present_perfect = { | |
'aux':['VBP','VBZ'], | |
'main':['VBN'], | |
'tobe':{'NN':'has{} been','NNS':'have{} been'} | |
} | |
class Parser: | |
def __init__( | |
self | |
) -> None: | |
self.parser = None | |
self.__init_parser(model="en_core_web_sm") | |
def __init_parser( | |
self, | |
model: str | |
) -> None: | |
self.parser = None | |
try: | |
self.parser = spacy.load(model) | |
except: | |
print(f"* Downloading {model} model...") | |
_ = subprocess.Popen( | |
f"python -m spacy download {model}", | |
stdout=subprocess.PIPE, | |
shell=True).communicate() | |
self.parser = spacy.load(model) | |
def verb2participle( | |
self, | |
verb: str | |
) -> str: | |
tk = self.parser(verb)[0] | |
return tk._.inflect('VBN') | |
def subj2obj( | |
self, | |
pronoun: str | |
) -> str: | |
""" | |
Convert Subject pronouns to Object pronouns. | |
""" | |
mapping = {"i":"me","you":"you","we":"us","they":"them","he":"him","she":"her", "it":"it"} | |
return mapping.get(pronoun.lower(), None) | |
def get_gramatical_number( | |
self, | |
dobj_data: List[List[Tuple[str,str,str]]] | |
) -> Union[str, None]: | |
result = [tag for _,dep,tag in dobj_data if dep == 'dobj'] | |
if len(result) == 0: | |
result = None | |
else: | |
result = result[0].replace('NNP', 'NN') | |
return result | |
def get_verbal_tense( | |
self, | |
verb_data: List[List[Tuple[str,str,str,int]]] | |
) -> Union[str, None]: | |
aux, neg, root = verb_data | |
root = root[0][2] if len(root) > 0 else None | |
aux = aux[0][2] if len(aux) > 0 else None | |
tense_name = None | |
for tense in Tense: | |
if aux in tense.value['aux'] and root in tense.value['main']: | |
tense_name = tense.name | |
break | |
return tense_name |