Spaces:
Running
Running
"""Provide advanced parsing abilities for ParenMatch and other extensions. | |
HyperParser uses PyParser. PyParser mostly gives information on the | |
proper indentation of code. HyperParser gives additional information on | |
the structure of code. | |
""" | |
from keyword import iskeyword | |
import string | |
from idlelib import pyparse | |
# all ASCII chars that may be in an identifier | |
_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_") | |
# all ASCII chars that may be the first char of an identifier | |
_ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_") | |
# lookup table for whether 7-bit ASCII chars are valid in a Python identifier | |
_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)] | |
# lookup table for whether 7-bit ASCII chars are valid as the first | |
# char in a Python identifier | |
_IS_ASCII_ID_FIRST_CHAR = \ | |
[(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)] | |
class HyperParser: | |
def __init__(self, editwin, index): | |
"To initialize, analyze the surroundings of the given index." | |
self.editwin = editwin | |
self.text = text = editwin.text | |
parser = pyparse.Parser(editwin.indentwidth, editwin.tabwidth) | |
def index2line(index): | |
return int(float(index)) | |
lno = index2line(text.index(index)) | |
if not editwin.prompt_last_line: | |
for context in editwin.num_context_lines: | |
startat = max(lno - context, 1) | |
startatindex = repr(startat) + ".0" | |
stopatindex = "%d.end" % lno | |
# We add the newline because PyParse requires a newline | |
# at end. We add a space so that index won't be at end | |
# of line, so that its status will be the same as the | |
# char before it, if should. | |
parser.set_code(text.get(startatindex, stopatindex)+' \n') | |
bod = parser.find_good_parse_start( | |
editwin._build_char_in_string_func(startatindex)) | |
if bod is not None or startat == 1: | |
break | |
parser.set_lo(bod or 0) | |
else: | |
r = text.tag_prevrange("console", index) | |
if r: | |
startatindex = r[1] | |
else: | |
startatindex = "1.0" | |
stopatindex = "%d.end" % lno | |
# We add the newline because PyParse requires it. We add a | |
# space so that index won't be at end of line, so that its | |
# status will be the same as the char before it, if should. | |
parser.set_code(text.get(startatindex, stopatindex)+' \n') | |
parser.set_lo(0) | |
# We want what the parser has, minus the last newline and space. | |
self.rawtext = parser.code[:-2] | |
# Parser.code apparently preserves the statement we are in, so | |
# that stopatindex can be used to synchronize the string with | |
# the text box indices. | |
self.stopatindex = stopatindex | |
self.bracketing = parser.get_last_stmt_bracketing() | |
# find which pairs of bracketing are openers. These always | |
# correspond to a character of rawtext. | |
self.isopener = [i>0 and self.bracketing[i][1] > | |
self.bracketing[i-1][1] | |
for i in range(len(self.bracketing))] | |
self.set_index(index) | |
def set_index(self, index): | |
"""Set the index to which the functions relate. | |
The index must be in the same statement. | |
""" | |
indexinrawtext = (len(self.rawtext) - | |
len(self.text.get(index, self.stopatindex))) | |
if indexinrawtext < 0: | |
raise ValueError("Index %s precedes the analyzed statement" | |
% index) | |
self.indexinrawtext = indexinrawtext | |
# find the rightmost bracket to which index belongs | |
self.indexbracket = 0 | |
while (self.indexbracket < len(self.bracketing)-1 and | |
self.bracketing[self.indexbracket+1][0] < self.indexinrawtext): | |
self.indexbracket += 1 | |
if (self.indexbracket < len(self.bracketing)-1 and | |
self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and | |
not self.isopener[self.indexbracket+1]): | |
self.indexbracket += 1 | |
def is_in_string(self): | |
"""Is the index given to the HyperParser in a string?""" | |
# The bracket to which we belong should be an opener. | |
# If it's an opener, it has to have a character. | |
return (self.isopener[self.indexbracket] and | |
self.rawtext[self.bracketing[self.indexbracket][0]] | |
in ('"', "'")) | |
def is_in_code(self): | |
"""Is the index given to the HyperParser in normal code?""" | |
return (not self.isopener[self.indexbracket] or | |
self.rawtext[self.bracketing[self.indexbracket][0]] | |
not in ('#', '"', "'")) | |
def get_surrounding_brackets(self, openers='([{', mustclose=False): | |
"""Return bracket indexes or None. | |
If the index given to the HyperParser is surrounded by a | |
bracket defined in openers (or at least has one before it), | |
return the indices of the opening bracket and the closing | |
bracket (or the end of line, whichever comes first). | |
If it is not surrounded by brackets, or the end of line comes | |
before the closing bracket and mustclose is True, returns None. | |
""" | |
bracketinglevel = self.bracketing[self.indexbracket][1] | |
before = self.indexbracket | |
while (not self.isopener[before] or | |
self.rawtext[self.bracketing[before][0]] not in openers or | |
self.bracketing[before][1] > bracketinglevel): | |
before -= 1 | |
if before < 0: | |
return None | |
bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) | |
after = self.indexbracket + 1 | |
while (after < len(self.bracketing) and | |
self.bracketing[after][1] >= bracketinglevel): | |
after += 1 | |
beforeindex = self.text.index("%s-%dc" % | |
(self.stopatindex, len(self.rawtext)-self.bracketing[before][0])) | |
if (after >= len(self.bracketing) or | |
self.bracketing[after][0] > len(self.rawtext)): | |
if mustclose: | |
return None | |
afterindex = self.stopatindex | |
else: | |
# We are after a real char, so it is a ')' and we give the | |
# index before it. | |
afterindex = self.text.index( | |
"%s-%dc" % (self.stopatindex, | |
len(self.rawtext)-(self.bracketing[after][0]-1))) | |
return beforeindex, afterindex | |
# the set of built-in identifiers which are also keywords, | |
# i.e. keyword.iskeyword() returns True for them | |
_ID_KEYWORDS = frozenset({"True", "False", "None"}) | |
def _eat_identifier(cls, str, limit, pos): | |
"""Given a string and pos, return the number of chars in the | |
identifier which ends at pos, or 0 if there is no such one. | |
This ignores non-identifier eywords are not identifiers. | |
""" | |
is_ascii_id_char = _IS_ASCII_ID_CHAR | |
# Start at the end (pos) and work backwards. | |
i = pos | |
# Go backwards as long as the characters are valid ASCII | |
# identifier characters. This is an optimization, since it | |
# is faster in the common case where most of the characters | |
# are ASCII. | |
while i > limit and ( | |
ord(str[i - 1]) < 128 and | |
is_ascii_id_char[ord(str[i - 1])] | |
): | |
i -= 1 | |
# If the above loop ended due to reaching a non-ASCII | |
# character, continue going backwards using the most generic | |
# test for whether a string contains only valid identifier | |
# characters. | |
if i > limit and ord(str[i - 1]) >= 128: | |
while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier(): | |
i -= 4 | |
if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier(): | |
i -= 2 | |
if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier(): | |
i -= 1 | |
# The identifier candidate starts here. If it isn't a valid | |
# identifier, don't eat anything. At this point that is only | |
# possible if the first character isn't a valid first | |
# character for an identifier. | |
if not str[i:pos].isidentifier(): | |
return 0 | |
elif i < pos: | |
# All characters in str[i:pos] are valid ASCII identifier | |
# characters, so it is enough to check that the first is | |
# valid as the first character of an identifier. | |
if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]: | |
return 0 | |
# All keywords are valid identifiers, but should not be | |
# considered identifiers here, except for True, False and None. | |
if i < pos and ( | |
iskeyword(str[i:pos]) and | |
str[i:pos] not in cls._ID_KEYWORDS | |
): | |
return 0 | |
return pos - i | |
# This string includes all chars that may be in a white space | |
_whitespace_chars = " \t\n\\" | |
def get_expression(self): | |
"""Return a string with the Python expression which ends at the | |
given index, which is empty if there is no real one. | |
""" | |
if not self.is_in_code(): | |
raise ValueError("get_expression should only be called " | |
"if index is inside a code.") | |
rawtext = self.rawtext | |
bracketing = self.bracketing | |
brck_index = self.indexbracket | |
brck_limit = bracketing[brck_index][0] | |
pos = self.indexinrawtext | |
last_identifier_pos = pos | |
postdot_phase = True | |
while True: | |
# Eat whitespaces, comments, and if postdot_phase is False - a dot | |
while True: | |
if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: | |
# Eat a whitespace | |
pos -= 1 | |
elif (not postdot_phase and | |
pos > brck_limit and rawtext[pos-1] == '.'): | |
# Eat a dot | |
pos -= 1 | |
postdot_phase = True | |
# The next line will fail if we are *inside* a comment, | |
# but we shouldn't be. | |
elif (pos == brck_limit and brck_index > 0 and | |
rawtext[bracketing[brck_index-1][0]] == '#'): | |
# Eat a comment | |
brck_index -= 2 | |
brck_limit = bracketing[brck_index][0] | |
pos = bracketing[brck_index+1][0] | |
else: | |
# If we didn't eat anything, quit. | |
break | |
if not postdot_phase: | |
# We didn't find a dot, so the expression end at the | |
# last identifier pos. | |
break | |
ret = self._eat_identifier(rawtext, brck_limit, pos) | |
if ret: | |
# There is an identifier to eat | |
pos = pos - ret | |
last_identifier_pos = pos | |
# Now, to continue the search, we must find a dot. | |
postdot_phase = False | |
# (the loop continues now) | |
elif pos == brck_limit: | |
# We are at a bracketing limit. If it is a closing | |
# bracket, eat the bracket, otherwise, stop the search. | |
level = bracketing[brck_index][1] | |
while brck_index > 0 and bracketing[brck_index-1][1] > level: | |
brck_index -= 1 | |
if bracketing[brck_index][0] == brck_limit: | |
# We were not at the end of a closing bracket | |
break | |
pos = bracketing[brck_index][0] | |
brck_index -= 1 | |
brck_limit = bracketing[brck_index][0] | |
last_identifier_pos = pos | |
if rawtext[pos] in "([": | |
# [] and () may be used after an identifier, so we | |
# continue. postdot_phase is True, so we don't allow a dot. | |
pass | |
else: | |
# We can't continue after other types of brackets | |
if rawtext[pos] in "'\"": | |
# Scan a string prefix | |
while pos > 0 and rawtext[pos - 1] in "rRbBuU": | |
pos -= 1 | |
last_identifier_pos = pos | |
break | |
else: | |
# We've found an operator or something. | |
break | |
return rawtext[last_identifier_pos:self.indexinrawtext] | |
if __name__ == '__main__': | |
from unittest import main | |
main('idlelib.idle_test.test_hyperparser', verbosity=2) | |