Spaces:
Runtime error
Runtime error
# | |
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. | |
# Use of this file is governed by the BSD 3-clause license that | |
# can be found in the LICENSE.txt file in the project root. | |
# | |
import sys | |
from antlr4.IntervalSet import IntervalSet | |
from antlr4.Token import Token | |
from antlr4.atn.ATNState import ATNState | |
from antlr4.error.Errors import RecognitionException, NoViableAltException, InputMismatchException, \ | |
FailedPredicateException, ParseCancellationException | |
# need forward declaration | |
Parser = None | |
class ErrorStrategy(object): | |
def reset(self, recognizer:Parser): | |
pass | |
def recoverInline(self, recognizer:Parser): | |
pass | |
def recover(self, recognizer:Parser, e:RecognitionException): | |
pass | |
def sync(self, recognizer:Parser): | |
pass | |
def inErrorRecoveryMode(self, recognizer:Parser): | |
pass | |
def reportError(self, recognizer:Parser, e:RecognitionException): | |
pass | |
# This is the default implementation of {@link ANTLRErrorStrategy} used for | |
# error reporting and recovery in ANTLR parsers. | |
# | |
class DefaultErrorStrategy(ErrorStrategy): | |
def __init__(self): | |
super().__init__() | |
# Indicates whether the error strategy is currently "recovering from an | |
# error". This is used to suppress reporting multiple error messages while | |
# attempting to recover from a detected syntax error. | |
# | |
# @see #inErrorRecoveryMode | |
# | |
self.errorRecoveryMode = False | |
# The index into the input stream where the last error occurred. | |
# This is used to prevent infinite loops where an error is found | |
# but no token is consumed during recovery...another error is found, | |
# ad nauseum. This is a failsafe mechanism to guarantee that at least | |
# one token/tree node is consumed for two errors. | |
# | |
self.lastErrorIndex = -1 | |
self.lastErrorStates = None | |
self.nextTokensContext = None | |
self.nextTokenState = 0 | |
# <p>The default implementation simply calls {@link #endErrorCondition} to | |
# ensure that the handler is not in error recovery mode.</p> | |
def reset(self, recognizer:Parser): | |
self.endErrorCondition(recognizer) | |
# | |
# This method is called to enter error recovery mode when a recognition | |
# exception is reported. | |
# | |
# @param recognizer the parser instance | |
# | |
def beginErrorCondition(self, recognizer:Parser): | |
self.errorRecoveryMode = True | |
def inErrorRecoveryMode(self, recognizer:Parser): | |
return self.errorRecoveryMode | |
# | |
# This method is called to leave error recovery mode after recovering from | |
# a recognition exception. | |
# | |
# @param recognizer | |
# | |
def endErrorCondition(self, recognizer:Parser): | |
self.errorRecoveryMode = False | |
self.lastErrorStates = None | |
self.lastErrorIndex = -1 | |
# | |
# {@inheritDoc} | |
# | |
# <p>The default implementation simply calls {@link #endErrorCondition}.</p> | |
# | |
def reportMatch(self, recognizer:Parser): | |
self.endErrorCondition(recognizer) | |
# | |
# {@inheritDoc} | |
# | |
# <p>The default implementation returns immediately if the handler is already | |
# in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} | |
# and dispatches the reporting task based on the runtime type of {@code e} | |
# according to the following table.</p> | |
# | |
# <ul> | |
# <li>{@link NoViableAltException}: Dispatches the call to | |
# {@link #reportNoViableAlternative}</li> | |
# <li>{@link InputMismatchException}: Dispatches the call to | |
# {@link #reportInputMismatch}</li> | |
# <li>{@link FailedPredicateException}: Dispatches the call to | |
# {@link #reportFailedPredicate}</li> | |
# <li>All other types: calls {@link Parser#notifyErrorListeners} to report | |
# the exception</li> | |
# </ul> | |
# | |
def reportError(self, recognizer:Parser, e:RecognitionException): | |
# if we've already reported an error and have not matched a token | |
# yet successfully, don't report any errors. | |
if self.inErrorRecoveryMode(recognizer): | |
return # don't report spurious errors | |
self.beginErrorCondition(recognizer) | |
if isinstance( e, NoViableAltException ): | |
self.reportNoViableAlternative(recognizer, e) | |
elif isinstance( e, InputMismatchException ): | |
self.reportInputMismatch(recognizer, e) | |
elif isinstance( e, FailedPredicateException ): | |
self.reportFailedPredicate(recognizer, e) | |
else: | |
print("unknown recognition error type: " + type(e).__name__) | |
recognizer.notifyErrorListeners(e.message, e.offendingToken, e) | |
# | |
# {@inheritDoc} | |
# | |
# <p>The default implementation resynchronizes the parser by consuming tokens | |
# until we find one in the resynchronization set--loosely the set of tokens | |
# that can follow the current rule.</p> | |
# | |
def recover(self, recognizer:Parser, e:RecognitionException): | |
if self.lastErrorIndex==recognizer.getInputStream().index \ | |
and self.lastErrorStates is not None \ | |
and recognizer.state in self.lastErrorStates: | |
# uh oh, another error at same token index and previously-visited | |
# state in ATN; must be a case where LT(1) is in the recovery | |
# token set so nothing got consumed. Consume a single token | |
# at least to prevent an infinite loop; this is a failsafe. | |
recognizer.consume() | |
self.lastErrorIndex = recognizer._input.index | |
if self.lastErrorStates is None: | |
self.lastErrorStates = [] | |
self.lastErrorStates.append(recognizer.state) | |
followSet = self.getErrorRecoverySet(recognizer) | |
self.consumeUntil(recognizer, followSet) | |
# The default implementation of {@link ANTLRErrorStrategy#sync} makes sure | |
# that the current lookahead symbol is consistent with what were expecting | |
# at this point in the ATN. You can call this anytime but ANTLR only | |
# generates code to check before subrules/loops and each iteration. | |
# | |
# <p>Implements Jim Idle's magic sync mechanism in closures and optional | |
# subrules. E.g.,</p> | |
# | |
# <pre> | |
# a : sync ( stuff sync )* ; | |
# sync : {consume to what can follow sync} ; | |
# </pre> | |
# | |
# At the start of a sub rule upon error, {@link #sync} performs single | |
# token deletion, if possible. If it can't do that, it bails on the current | |
# rule and uses the default error recovery, which consumes until the | |
# resynchronization set of the current rule. | |
# | |
# <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block | |
# with an empty alternative), then the expected set includes what follows | |
# the subrule.</p> | |
# | |
# <p>During loop iteration, it consumes until it sees a token that can start a | |
# sub rule or what follows loop. Yes, that is pretty aggressive. We opt to | |
# stay in the loop as long as possible.</p> | |
# | |
# <p><strong>ORIGINS</strong></p> | |
# | |
# <p>Previous versions of ANTLR did a poor job of their recovery within loops. | |
# A single mismatch token or missing token would force the parser to bail | |
# out of the entire rules surrounding the loop. So, for rule</p> | |
# | |
# <pre> | |
# classDef : 'class' ID '{' member* '}' | |
# </pre> | |
# | |
# input with an extra token between members would force the parser to | |
# consume until it found the next class definition rather than the next | |
# member definition of the current class. | |
# | |
# <p>This functionality cost a little bit of effort because the parser has to | |
# compare token set at the start of the loop and at each iteration. If for | |
# some reason speed is suffering for you, you can turn off this | |
# functionality by simply overriding this method as a blank { }.</p> | |
# | |
def sync(self, recognizer:Parser): | |
# If already recovering, don't try to sync | |
if self.inErrorRecoveryMode(recognizer): | |
return | |
s = recognizer._interp.atn.states[recognizer.state] | |
la = recognizer.getTokenStream().LA(1) | |
# try cheaper subset first; might get lucky. seems to shave a wee bit off | |
nextTokens = recognizer.atn.nextTokens(s) | |
if la in nextTokens: | |
self.nextTokensContext = None | |
self.nextTokenState = ATNState.INVALID_STATE_NUMBER | |
return | |
elif Token.EPSILON in nextTokens: | |
if self.nextTokensContext is None: | |
# It's possible the next token won't match information tracked | |
# by sync is restricted for performance. | |
self.nextTokensContext = recognizer._ctx | |
self.nextTokensState = recognizer._stateNumber | |
return | |
if s.stateType in [ATNState.BLOCK_START, ATNState.STAR_BLOCK_START, | |
ATNState.PLUS_BLOCK_START, ATNState.STAR_LOOP_ENTRY]: | |
# report error and recover if possible | |
if self.singleTokenDeletion(recognizer)is not None: | |
return | |
else: | |
raise InputMismatchException(recognizer) | |
elif s.stateType in [ATNState.PLUS_LOOP_BACK, ATNState.STAR_LOOP_BACK]: | |
self.reportUnwantedToken(recognizer) | |
expecting = recognizer.getExpectedTokens() | |
whatFollowsLoopIterationOrRule = expecting.addSet(self.getErrorRecoverySet(recognizer)) | |
self.consumeUntil(recognizer, whatFollowsLoopIterationOrRule) | |
else: | |
# do nothing if we can't identify the exact kind of ATN state | |
pass | |
# This is called by {@link #reportError} when the exception is a | |
# {@link NoViableAltException}. | |
# | |
# @see #reportError | |
# | |
# @param recognizer the parser instance | |
# @param e the recognition exception | |
# | |
def reportNoViableAlternative(self, recognizer:Parser, e:NoViableAltException): | |
tokens = recognizer.getTokenStream() | |
if tokens is not None: | |
if e.startToken.type==Token.EOF: | |
input = "<EOF>" | |
else: | |
input = tokens.getText(e.startToken, e.offendingToken) | |
else: | |
input = "<unknown input>" | |
msg = "no viable alternative at input " + self.escapeWSAndQuote(input) | |
recognizer.notifyErrorListeners(msg, e.offendingToken, e) | |
# | |
# This is called by {@link #reportError} when the exception is an | |
# {@link InputMismatchException}. | |
# | |
# @see #reportError | |
# | |
# @param recognizer the parser instance | |
# @param e the recognition exception | |
# | |
def reportInputMismatch(self, recognizer:Parser, e:InputMismatchException): | |
msg = "mismatched input " + self.getTokenErrorDisplay(e.offendingToken) \ | |
+ " expecting " + e.getExpectedTokens().toString(recognizer.literalNames, recognizer.symbolicNames) | |
recognizer.notifyErrorListeners(msg, e.offendingToken, e) | |
# | |
# This is called by {@link #reportError} when the exception is a | |
# {@link FailedPredicateException}. | |
# | |
# @see #reportError | |
# | |
# @param recognizer the parser instance | |
# @param e the recognition exception | |
# | |
def reportFailedPredicate(self, recognizer, e): | |
ruleName = recognizer.ruleNames[recognizer._ctx.getRuleIndex()] | |
msg = "rule " + ruleName + " " + e.message | |
recognizer.notifyErrorListeners(msg, e.offendingToken, e) | |
# This method is called to report a syntax error which requires the removal | |
# of a token from the input stream. At the time this method is called, the | |
# erroneous symbol is current {@code LT(1)} symbol and has not yet been | |
# removed from the input stream. When this method returns, | |
# {@code recognizer} is in error recovery mode. | |
# | |
# <p>This method is called when {@link #singleTokenDeletion} identifies | |
# single-token deletion as a viable recovery strategy for a mismatched | |
# input error.</p> | |
# | |
# <p>The default implementation simply returns if the handler is already in | |
# error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to | |
# enter error recovery mode, followed by calling | |
# {@link Parser#notifyErrorListeners}.</p> | |
# | |
# @param recognizer the parser instance | |
# | |
def reportUnwantedToken(self, recognizer:Parser): | |
if self.inErrorRecoveryMode(recognizer): | |
return | |
self.beginErrorCondition(recognizer) | |
t = recognizer.getCurrentToken() | |
tokenName = self.getTokenErrorDisplay(t) | |
expecting = self.getExpectedTokens(recognizer) | |
msg = "extraneous input " + tokenName + " expecting " \ | |
+ expecting.toString(recognizer.literalNames, recognizer.symbolicNames) | |
recognizer.notifyErrorListeners(msg, t, None) | |
# This method is called to report a syntax error which requires the | |
# insertion of a missing token into the input stream. At the time this | |
# method is called, the missing token has not yet been inserted. When this | |
# method returns, {@code recognizer} is in error recovery mode. | |
# | |
# <p>This method is called when {@link #singleTokenInsertion} identifies | |
# single-token insertion as a viable recovery strategy for a mismatched | |
# input error.</p> | |
# | |
# <p>The default implementation simply returns if the handler is already in | |
# error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to | |
# enter error recovery mode, followed by calling | |
# {@link Parser#notifyErrorListeners}.</p> | |
# | |
# @param recognizer the parser instance | |
# | |
def reportMissingToken(self, recognizer:Parser): | |
if self.inErrorRecoveryMode(recognizer): | |
return | |
self.beginErrorCondition(recognizer) | |
t = recognizer.getCurrentToken() | |
expecting = self.getExpectedTokens(recognizer) | |
msg = "missing " + expecting.toString(recognizer.literalNames, recognizer.symbolicNames) \ | |
+ " at " + self.getTokenErrorDisplay(t) | |
recognizer.notifyErrorListeners(msg, t, None) | |
# <p>The default implementation attempts to recover from the mismatched input | |
# by using single token insertion and deletion as described below. If the | |
# recovery attempt fails, this method throws an | |
# {@link InputMismatchException}.</p> | |
# | |
# <p><strong>EXTRA TOKEN</strong> (single token deletion)</p> | |
# | |
# <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the | |
# right token, however, then assume {@code LA(1)} is some extra spurious | |
# token and delete it. Then consume and return the next token (which was | |
# the {@code LA(2)} token) as the successful result of the match operation.</p> | |
# | |
# <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p> | |
# | |
# <p><strong>MISSING TOKEN</strong> (single token insertion)</p> | |
# | |
# <p>If current token (at {@code LA(1)}) is consistent with what could come | |
# after the expected {@code LA(1)} token, then assume the token is missing | |
# and use the parser's {@link TokenFactory} to create it on the fly. The | |
# "insertion" is performed by returning the created token as the successful | |
# result of the match operation.</p> | |
# | |
# <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p> | |
# | |
# <p><strong>EXAMPLE</strong></p> | |
# | |
# <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When | |
# the parser returns from the nested call to {@code expr}, it will have | |
# call chain:</p> | |
# | |
# <pre> | |
# stat → expr → atom | |
# </pre> | |
# | |
# and it will be trying to match the {@code ')'} at this point in the | |
# derivation: | |
# | |
# <pre> | |
# => ID '=' '(' INT ')' ('+' atom)* ';' | |
# ^ | |
# </pre> | |
# | |
# The attempt to match {@code ')'} will fail when it sees {@code ';'} and | |
# call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} | |
# is in the set of tokens that can follow the {@code ')'} token reference | |
# in rule {@code atom}. It can assume that you forgot the {@code ')'}. | |
# | |
def recoverInline(self, recognizer:Parser): | |
# SINGLE TOKEN DELETION | |
matchedSymbol = self.singleTokenDeletion(recognizer) | |
if matchedSymbol is not None: | |
# we have deleted the extra token. | |
# now, move past ttype token as if all were ok | |
recognizer.consume() | |
return matchedSymbol | |
# SINGLE TOKEN INSERTION | |
if self.singleTokenInsertion(recognizer): | |
return self.getMissingSymbol(recognizer) | |
# even that didn't work; must throw the exception | |
raise InputMismatchException(recognizer) | |
# | |
# This method implements the single-token insertion inline error recovery | |
# strategy. It is called by {@link #recoverInline} if the single-token | |
# deletion strategy fails to recover from the mismatched input. If this | |
# method returns {@code true}, {@code recognizer} will be in error recovery | |
# mode. | |
# | |
# <p>This method determines whether or not single-token insertion is viable by | |
# checking if the {@code LA(1)} input symbol could be successfully matched | |
# if it were instead the {@code LA(2)} symbol. If this method returns | |
# {@code true}, the caller is responsible for creating and inserting a | |
# token with the correct type to produce this behavior.</p> | |
# | |
# @param recognizer the parser instance | |
# @return {@code true} if single-token insertion is a viable recovery | |
# strategy for the current mismatched input, otherwise {@code false} | |
# | |
def singleTokenInsertion(self, recognizer:Parser): | |
currentSymbolType = recognizer.getTokenStream().LA(1) | |
# if current token is consistent with what could come after current | |
# ATN state, then we know we're missing a token; error recovery | |
# is free to conjure up and insert the missing token | |
atn = recognizer._interp.atn | |
currentState = atn.states[recognizer.state] | |
next = currentState.transitions[0].target | |
expectingAtLL2 = atn.nextTokens(next, recognizer._ctx) | |
if currentSymbolType in expectingAtLL2: | |
self.reportMissingToken(recognizer) | |
return True | |
else: | |
return False | |
# This method implements the single-token deletion inline error recovery | |
# strategy. It is called by {@link #recoverInline} to attempt to recover | |
# from mismatched input. If this method returns null, the parser and error | |
# handler state will not have changed. If this method returns non-null, | |
# {@code recognizer} will <em>not</em> be in error recovery mode since the | |
# returned token was a successful match. | |
# | |
# <p>If the single-token deletion is successful, this method calls | |
# {@link #reportUnwantedToken} to report the error, followed by | |
# {@link Parser#consume} to actually "delete" the extraneous token. Then, | |
# before returning {@link #reportMatch} is called to signal a successful | |
# match.</p> | |
# | |
# @param recognizer the parser instance | |
# @return the successfully matched {@link Token} instance if single-token | |
# deletion successfully recovers from the mismatched input, otherwise | |
# {@code null} | |
# | |
def singleTokenDeletion(self, recognizer:Parser): | |
nextTokenType = recognizer.getTokenStream().LA(2) | |
expecting = self.getExpectedTokens(recognizer) | |
if nextTokenType in expecting: | |
self.reportUnwantedToken(recognizer) | |
# print("recoverFromMismatchedToken deleting " \ | |
# + str(recognizer.getTokenStream().LT(1)) \ | |
# + " since " + str(recognizer.getTokenStream().LT(2)) \ | |
# + " is what we want", file=sys.stderr) | |
recognizer.consume() # simply delete extra token | |
# we want to return the token we're actually matching | |
matchedSymbol = recognizer.getCurrentToken() | |
self.reportMatch(recognizer) # we know current token is correct | |
return matchedSymbol | |
else: | |
return None | |
# Conjure up a missing token during error recovery. | |
# | |
# The recognizer attempts to recover from single missing | |
# symbols. But, actions might refer to that missing symbol. | |
# For example, x=ID {f($x);}. The action clearly assumes | |
# that there has been an identifier matched previously and that | |
# $x points at that token. If that token is missing, but | |
# the next token in the stream is what we want we assume that | |
# this token is missing and we keep going. Because we | |
# have to return some token to replace the missing token, | |
# we have to conjure one up. This method gives the user control | |
# over the tokens returned for missing tokens. Mostly, | |
# you will want to create something special for identifier | |
# tokens. For literals such as '{' and ',', the default | |
# action in the parser or tree parser works. It simply creates | |
# a CommonToken of the appropriate type. The text will be the token. | |
# If you change what tokens must be created by the lexer, | |
# override this method to create the appropriate tokens. | |
# | |
def getMissingSymbol(self, recognizer:Parser): | |
currentSymbol = recognizer.getCurrentToken() | |
expecting = self.getExpectedTokens(recognizer) | |
expectedTokenType = expecting[0] # get any element | |
if expectedTokenType==Token.EOF: | |
tokenText = "<missing EOF>" | |
else: | |
name = None | |
if expectedTokenType < len(recognizer.literalNames): | |
name = recognizer.literalNames[expectedTokenType] | |
if name is None and expectedTokenType < len(recognizer.symbolicNames): | |
name = recognizer.symbolicNames[expectedTokenType] | |
tokenText = "<missing " + str(name) + ">" | |
current = currentSymbol | |
lookback = recognizer.getTokenStream().LT(-1) | |
if current.type==Token.EOF and lookback is not None: | |
current = lookback | |
return recognizer.getTokenFactory().create(current.source, | |
expectedTokenType, tokenText, Token.DEFAULT_CHANNEL, | |
-1, -1, current.line, current.column) | |
def getExpectedTokens(self, recognizer:Parser): | |
return recognizer.getExpectedTokens() | |
# How should a token be displayed in an error message? The default | |
# is to display just the text, but during development you might | |
# want to have a lot of information spit out. Override in that case | |
# to use t.toString() (which, for CommonToken, dumps everything about | |
# the token). This is better than forcing you to override a method in | |
# your token objects because you don't have to go modify your lexer | |
# so that it creates a new Java type. | |
# | |
def getTokenErrorDisplay(self, t:Token): | |
if t is None: | |
return "<no token>" | |
s = t.text | |
if s is None: | |
if t.type==Token.EOF: | |
s = "<EOF>" | |
else: | |
s = "<" + str(t.type) + ">" | |
return self.escapeWSAndQuote(s) | |
def escapeWSAndQuote(self, s:str): | |
s = s.replace("\n","\\n") | |
s = s.replace("\r","\\r") | |
s = s.replace("\t","\\t") | |
return "'" + s + "'" | |
# Compute the error recovery set for the current rule. During | |
# rule invocation, the parser pushes the set of tokens that can | |
# follow that rule reference on the stack; this amounts to | |
# computing FIRST of what follows the rule reference in the | |
# enclosing rule. See LinearApproximator.FIRST(). | |
# This local follow set only includes tokens | |
# from within the rule; i.e., the FIRST computation done by | |
# ANTLR stops at the end of a rule. | |
# | |
# EXAMPLE | |
# | |
# When you find a "no viable alt exception", the input is not | |
# consistent with any of the alternatives for rule r. The best | |
# thing to do is to consume tokens until you see something that | |
# can legally follow a call to r#or* any rule that called r. | |
# You don't want the exact set of viable next tokens because the | |
# input might just be missing a token--you might consume the | |
# rest of the input looking for one of the missing tokens. | |
# | |
# Consider grammar: | |
# | |
# a : '[' b ']' | |
# | '(' b ')' | |
# ; | |
# b : c '^' INT ; | |
# c : ID | |
# | INT | |
# ; | |
# | |
# At each rule invocation, the set of tokens that could follow | |
# that rule is pushed on a stack. Here are the various | |
# context-sensitive follow sets: | |
# | |
# FOLLOW(b1_in_a) = FIRST(']') = ']' | |
# FOLLOW(b2_in_a) = FIRST(')') = ')' | |
# FOLLOW(c_in_b) = FIRST('^') = '^' | |
# | |
# Upon erroneous input "[]", the call chain is | |
# | |
# a -> b -> c | |
# | |
# and, hence, the follow context stack is: | |
# | |
# depth follow set start of rule execution | |
# 0 <EOF> a (from main()) | |
# 1 ']' b | |
# 2 '^' c | |
# | |
# Notice that ')' is not included, because b would have to have | |
# been called from a different context in rule a for ')' to be | |
# included. | |
# | |
# For error recovery, we cannot consider FOLLOW(c) | |
# (context-sensitive or otherwise). We need the combined set of | |
# all context-sensitive FOLLOW sets--the set of all tokens that | |
# could follow any reference in the call chain. We need to | |
# resync to one of those tokens. Note that FOLLOW(c)='^' and if | |
# we resync'd to that token, we'd consume until EOF. We need to | |
# sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. | |
# In this case, for input "[]", LA(1) is ']' and in the set, so we would | |
# not consume anything. After printing an error, rule c would | |
# return normally. Rule b would not find the required '^' though. | |
# At this point, it gets a mismatched token error and throws an | |
# exception (since LA(1) is not in the viable following token | |
# set). The rule exception handler tries to recover, but finds | |
# the same recovery set and doesn't consume anything. Rule b | |
# exits normally returning to rule a. Now it finds the ']' (and | |
# with the successful match exits errorRecovery mode). | |
# | |
# So, you can see that the parser walks up the call chain looking | |
# for the token that was a member of the recovery set. | |
# | |
# Errors are not generated in errorRecovery mode. | |
# | |
# ANTLR's error recovery mechanism is based upon original ideas: | |
# | |
# "Algorithms + Data Structures = Programs" by Niklaus Wirth | |
# | |
# and | |
# | |
# "A note on error recovery in recursive descent parsers": | |
# http:#portal.acm.org/citation.cfm?id=947902.947905 | |
# | |
# Later, Josef Grosch had some good ideas: | |
# | |
# "Efficient and Comfortable Error Recovery in Recursive Descent | |
# Parsers": | |
# ftp:#www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip | |
# | |
# Like Grosch I implement context-sensitive FOLLOW sets that are combined | |
# at run-time upon error to avoid overhead during parsing. | |
# | |
def getErrorRecoverySet(self, recognizer:Parser): | |
atn = recognizer._interp.atn | |
ctx = recognizer._ctx | |
recoverSet = IntervalSet() | |
while ctx is not None and ctx.invokingState>=0: | |
# compute what follows who invoked us | |
invokingState = atn.states[ctx.invokingState] | |
rt = invokingState.transitions[0] | |
follow = atn.nextTokens(rt.followState) | |
recoverSet.addSet(follow) | |
ctx = ctx.parentCtx | |
recoverSet.removeOne(Token.EPSILON) | |
return recoverSet | |
# Consume tokens until one matches the given token set.# | |
def consumeUntil(self, recognizer:Parser, set_:set): | |
ttype = recognizer.getTokenStream().LA(1) | |
while ttype != Token.EOF and not ttype in set_: | |
recognizer.consume() | |
ttype = recognizer.getTokenStream().LA(1) | |
# | |
# This implementation of {@link ANTLRErrorStrategy} responds to syntax errors | |
# by immediately canceling the parse operation with a | |
# {@link ParseCancellationException}. The implementation ensures that the | |
# {@link ParserRuleContext#exception} field is set for all parse tree nodes | |
# that were not completed prior to encountering the error. | |
# | |
# <p> | |
# This error strategy is useful in the following scenarios.</p> | |
# | |
# <ul> | |
# <li><strong>Two-stage parsing:</strong> This error strategy allows the first | |
# stage of two-stage parsing to immediately terminate if an error is | |
# encountered, and immediately fall back to the second stage. In addition to | |
# avoiding wasted work by attempting to recover from errors here, the empty | |
# implementation of {@link BailErrorStrategy#sync} improves the performance of | |
# the first stage.</li> | |
# <li><strong>Silent validation:</strong> When syntax errors are not being | |
# reported or logged, and the parse result is simply ignored if errors occur, | |
# the {@link BailErrorStrategy} avoids wasting work on recovering from errors | |
# when the result will be ignored either way.</li> | |
# </ul> | |
# | |
# <p> | |
# {@code myparser.setErrorHandler(new BailErrorStrategy());}</p> | |
# | |
# @see Parser#setErrorHandler(ANTLRErrorStrategy) | |
# | |
class BailErrorStrategy(DefaultErrorStrategy): | |
# Instead of recovering from exception {@code e}, re-throw it wrapped | |
# in a {@link ParseCancellationException} so it is not caught by the | |
# rule function catches. Use {@link Exception#getCause()} to get the | |
# original {@link RecognitionException}. | |
# | |
def recover(self, recognizer:Parser, e:RecognitionException): | |
context = recognizer._ctx | |
while context is not None: | |
context.exception = e | |
context = context.parentCtx | |
raise ParseCancellationException(e) | |
# Make sure we don't attempt to recover inline; if the parser | |
# successfully recovers, it won't throw an exception. | |
# | |
def recoverInline(self, recognizer:Parser): | |
self.recover(recognizer, InputMismatchException(recognizer)) | |
# Make sure we don't attempt to recover from problems in subrules.# | |
def sync(self, recognizer:Parser): | |
pass | |
del Parser |