# # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. # Use of this file is governed by the BSD 3-clause license that # can be found in the LICENSE.txt file in the project root. # import sys from antlr4.IntervalSet import IntervalSet from antlr4.Token import Token from antlr4.atn.ATNState import ATNState from antlr4.error.Errors import RecognitionException, NoViableAltException, InputMismatchException, \ FailedPredicateException, ParseCancellationException # need forward declaration Parser = None class ErrorStrategy(object): def reset(self, recognizer:Parser): pass def recoverInline(self, recognizer:Parser): pass def recover(self, recognizer:Parser, e:RecognitionException): pass def sync(self, recognizer:Parser): pass def inErrorRecoveryMode(self, recognizer:Parser): pass def reportError(self, recognizer:Parser, e:RecognitionException): pass # This is the default implementation of {@link ANTLRErrorStrategy} used for # error reporting and recovery in ANTLR parsers. # class DefaultErrorStrategy(ErrorStrategy): def __init__(self): super().__init__() # Indicates whether the error strategy is currently "recovering from an # error". This is used to suppress reporting multiple error messages while # attempting to recover from a detected syntax error. # # @see #inErrorRecoveryMode # self.errorRecoveryMode = False # The index into the input stream where the last error occurred. # This is used to prevent infinite loops where an error is found # but no token is consumed during recovery...another error is found, # ad nauseum. This is a failsafe mechanism to guarantee that at least # one token/tree node is consumed for two errors. # self.lastErrorIndex = -1 self.lastErrorStates = None self.nextTokensContext = None self.nextTokenState = 0 #
The default implementation simply calls {@link #endErrorCondition} to # ensure that the handler is not in error recovery mode.
def reset(self, recognizer:Parser): self.endErrorCondition(recognizer) # # This method is called to enter error recovery mode when a recognition # exception is reported. # # @param recognizer the parser instance # def beginErrorCondition(self, recognizer:Parser): self.errorRecoveryMode = True def inErrorRecoveryMode(self, recognizer:Parser): return self.errorRecoveryMode # # This method is called to leave error recovery mode after recovering from # a recognition exception. # # @param recognizer # def endErrorCondition(self, recognizer:Parser): self.errorRecoveryMode = False self.lastErrorStates = None self.lastErrorIndex = -1 # # {@inheritDoc} # #The default implementation simply calls {@link #endErrorCondition}.
# def reportMatch(self, recognizer:Parser): self.endErrorCondition(recognizer) # # {@inheritDoc} # #The default implementation returns immediately if the handler is already # in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} # and dispatches the reporting task based on the runtime type of {@code e} # according to the following table.
# #The default implementation resynchronizes the parser by consuming tokens # until we find one in the resynchronization set--loosely the set of tokens # that can follow the current rule.
# def recover(self, recognizer:Parser, e:RecognitionException): if self.lastErrorIndex==recognizer.getInputStream().index \ and self.lastErrorStates is not None \ and recognizer.state in self.lastErrorStates: # uh oh, another error at same token index and previously-visited # state in ATN; must be a case where LT(1) is in the recovery # token set so nothing got consumed. Consume a single token # at least to prevent an infinite loop; this is a failsafe. recognizer.consume() self.lastErrorIndex = recognizer._input.index if self.lastErrorStates is None: self.lastErrorStates = [] self.lastErrorStates.append(recognizer.state) followSet = self.getErrorRecoverySet(recognizer) self.consumeUntil(recognizer, followSet) # The default implementation of {@link ANTLRErrorStrategy#sync} makes sure # that the current lookahead symbol is consistent with what were expecting # at this point in the ATN. You can call this anytime but ANTLR only # generates code to check before subrules/loops and each iteration. # #Implements Jim Idle's magic sync mechanism in closures and optional # subrules. E.g.,
# ## a : sync ( stuff sync )* ; # sync : {consume to what can follow sync} ; ## # At the start of a sub rule upon error, {@link #sync} performs single # token deletion, if possible. If it can't do that, it bails on the current # rule and uses the default error recovery, which consumes until the # resynchronization set of the current rule. # #
If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block # with an empty alternative), then the expected set includes what follows # the subrule.
# #During loop iteration, it consumes until it sees a token that can start a # sub rule or what follows loop. Yes, that is pretty aggressive. We opt to # stay in the loop as long as possible.
# #ORIGINS
# #Previous versions of ANTLR did a poor job of their recovery within loops. # A single mismatch token or missing token would force the parser to bail # out of the entire rules surrounding the loop. So, for rule
# ## classDef : 'class' ID '{' member* '}' ## # input with an extra token between members would force the parser to # consume until it found the next class definition rather than the next # member definition of the current class. # #
This functionality cost a little bit of effort because the parser has to # compare token set at the start of the loop and at each iteration. If for # some reason speed is suffering for you, you can turn off this # functionality by simply overriding this method as a blank { }.
# def sync(self, recognizer:Parser): # If already recovering, don't try to sync if self.inErrorRecoveryMode(recognizer): return s = recognizer._interp.atn.states[recognizer.state] la = recognizer.getTokenStream().LA(1) # try cheaper subset first; might get lucky. seems to shave a wee bit off nextTokens = recognizer.atn.nextTokens(s) if la in nextTokens: self.nextTokensContext = None self.nextTokenState = ATNState.INVALID_STATE_NUMBER return elif Token.EPSILON in nextTokens: if self.nextTokensContext is None: # It's possible the next token won't match information tracked # by sync is restricted for performance. self.nextTokensContext = recognizer._ctx self.nextTokensState = recognizer._stateNumber return if s.stateType in [ATNState.BLOCK_START, ATNState.STAR_BLOCK_START, ATNState.PLUS_BLOCK_START, ATNState.STAR_LOOP_ENTRY]: # report error and recover if possible if self.singleTokenDeletion(recognizer)is not None: return else: raise InputMismatchException(recognizer) elif s.stateType in [ATNState.PLUS_LOOP_BACK, ATNState.STAR_LOOP_BACK]: self.reportUnwantedToken(recognizer) expecting = recognizer.getExpectedTokens() whatFollowsLoopIterationOrRule = expecting.addSet(self.getErrorRecoverySet(recognizer)) self.consumeUntil(recognizer, whatFollowsLoopIterationOrRule) else: # do nothing if we can't identify the exact kind of ATN state pass # This is called by {@link #reportError} when the exception is a # {@link NoViableAltException}. # # @see #reportError # # @param recognizer the parser instance # @param e the recognition exception # def reportNoViableAlternative(self, recognizer:Parser, e:NoViableAltException): tokens = recognizer.getTokenStream() if tokens is not None: if e.startToken.type==Token.EOF: input = "This method is called when {@link #singleTokenDeletion} identifies # single-token deletion as a viable recovery strategy for a mismatched # input error.
# #The default implementation simply returns if the handler is already in # error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to # enter error recovery mode, followed by calling # {@link Parser#notifyErrorListeners}.
# # @param recognizer the parser instance # def reportUnwantedToken(self, recognizer:Parser): if self.inErrorRecoveryMode(recognizer): return self.beginErrorCondition(recognizer) t = recognizer.getCurrentToken() tokenName = self.getTokenErrorDisplay(t) expecting = self.getExpectedTokens(recognizer) msg = "extraneous input " + tokenName + " expecting " \ + expecting.toString(recognizer.literalNames, recognizer.symbolicNames) recognizer.notifyErrorListeners(msg, t, None) # This method is called to report a syntax error which requires the # insertion of a missing token into the input stream. At the time this # method is called, the missing token has not yet been inserted. When this # method returns, {@code recognizer} is in error recovery mode. # #This method is called when {@link #singleTokenInsertion} identifies # single-token insertion as a viable recovery strategy for a mismatched # input error.
# #The default implementation simply returns if the handler is already in # error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to # enter error recovery mode, followed by calling # {@link Parser#notifyErrorListeners}.
# # @param recognizer the parser instance # def reportMissingToken(self, recognizer:Parser): if self.inErrorRecoveryMode(recognizer): return self.beginErrorCondition(recognizer) t = recognizer.getCurrentToken() expecting = self.getExpectedTokens(recognizer) msg = "missing " + expecting.toString(recognizer.literalNames, recognizer.symbolicNames) \ + " at " + self.getTokenErrorDisplay(t) recognizer.notifyErrorListeners(msg, t, None) #The default implementation attempts to recover from the mismatched input # by using single token insertion and deletion as described below. If the # recovery attempt fails, this method throws an # {@link InputMismatchException}.
# #EXTRA TOKEN (single token deletion)
# #{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the # right token, however, then assume {@code LA(1)} is some extra spurious # token and delete it. Then consume and return the next token (which was # the {@code LA(2)} token) as the successful result of the match operation.
# #This recovery strategy is implemented by {@link #singleTokenDeletion}.
# #MISSING TOKEN (single token insertion)
# #If current token (at {@code LA(1)}) is consistent with what could come # after the expected {@code LA(1)} token, then assume the token is missing # and use the parser's {@link TokenFactory} to create it on the fly. The # "insertion" is performed by returning the created token as the successful # result of the match operation.
# #This recovery strategy is implemented by {@link #singleTokenInsertion}.
# #EXAMPLE
# #For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When # the parser returns from the nested call to {@code expr}, it will have # call chain:
# ## stat → expr → atom ## # and it will be trying to match the {@code ')'} at this point in the # derivation: # #
# => ID '=' '(' INT ')' ('+' atom)* ';' # ^ ## # The attempt to match {@code ')'} will fail when it sees {@code ';'} and # call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} # is in the set of tokens that can follow the {@code ')'} token reference # in rule {@code atom}. It can assume that you forgot the {@code ')'}. # def recoverInline(self, recognizer:Parser): # SINGLE TOKEN DELETION matchedSymbol = self.singleTokenDeletion(recognizer) if matchedSymbol is not None: # we have deleted the extra token. # now, move past ttype token as if all were ok recognizer.consume() return matchedSymbol # SINGLE TOKEN INSERTION if self.singleTokenInsertion(recognizer): return self.getMissingSymbol(recognizer) # even that didn't work; must throw the exception raise InputMismatchException(recognizer) # # This method implements the single-token insertion inline error recovery # strategy. It is called by {@link #recoverInline} if the single-token # deletion strategy fails to recover from the mismatched input. If this # method returns {@code true}, {@code recognizer} will be in error recovery # mode. # #
This method determines whether or not single-token insertion is viable by # checking if the {@code LA(1)} input symbol could be successfully matched # if it were instead the {@code LA(2)} symbol. If this method returns # {@code true}, the caller is responsible for creating and inserting a # token with the correct type to produce this behavior.
# # @param recognizer the parser instance # @return {@code true} if single-token insertion is a viable recovery # strategy for the current mismatched input, otherwise {@code false} # def singleTokenInsertion(self, recognizer:Parser): currentSymbolType = recognizer.getTokenStream().LA(1) # if current token is consistent with what could come after current # ATN state, then we know we're missing a token; error recovery # is free to conjure up and insert the missing token atn = recognizer._interp.atn currentState = atn.states[recognizer.state] next = currentState.transitions[0].target expectingAtLL2 = atn.nextTokens(next, recognizer._ctx) if currentSymbolType in expectingAtLL2: self.reportMissingToken(recognizer) return True else: return False # This method implements the single-token deletion inline error recovery # strategy. It is called by {@link #recoverInline} to attempt to recover # from mismatched input. If this method returns null, the parser and error # handler state will not have changed. If this method returns non-null, # {@code recognizer} will not be in error recovery mode since the # returned token was a successful match. # #If the single-token deletion is successful, this method calls # {@link #reportUnwantedToken} to report the error, followed by # {@link Parser#consume} to actually "delete" the extraneous token. Then, # before returning {@link #reportMatch} is called to signal a successful # match.
# # @param recognizer the parser instance # @return the successfully matched {@link Token} instance if single-token # deletion successfully recovers from the mismatched input, otherwise # {@code null} # def singleTokenDeletion(self, recognizer:Parser): nextTokenType = recognizer.getTokenStream().LA(2) expecting = self.getExpectedTokens(recognizer) if nextTokenType in expecting: self.reportUnwantedToken(recognizer) # print("recoverFromMismatchedToken deleting " \ # + str(recognizer.getTokenStream().LT(1)) \ # + " since " + str(recognizer.getTokenStream().LT(2)) \ # + " is what we want", file=sys.stderr) recognizer.consume() # simply delete extra token # we want to return the token we're actually matching matchedSymbol = recognizer.getCurrentToken() self.reportMatch(recognizer) # we know current token is correct return matchedSymbol else: return None # Conjure up a missing token during error recovery. # # The recognizer attempts to recover from single missing # symbols. But, actions might refer to that missing symbol. # For example, x=ID {f($x);}. The action clearly assumes # that there has been an identifier matched previously and that # $x points at that token. If that token is missing, but # the next token in the stream is what we want we assume that # this token is missing and we keep going. Because we # have to return some token to replace the missing token, # we have to conjure one up. This method gives the user control # over the tokens returned for missing tokens. Mostly, # you will want to create something special for identifier # tokens. For literals such as '{' and ',', the default # action in the parser or tree parser works. It simply creates # a CommonToken of the appropriate type. The text will be the token. # If you change what tokens must be created by the lexer, # override this method to create the appropriate tokens. # def getMissingSymbol(self, recognizer:Parser): currentSymbol = recognizer.getCurrentToken() expecting = self.getExpectedTokens(recognizer) expectedTokenType = expecting[0] # get any element if expectedTokenType==Token.EOF: tokenText = "# This error strategy is useful in the following scenarios.
# ## {@code myparser.setErrorHandler(new BailErrorStrategy());}
# # @see Parser#setErrorHandler(ANTLRErrorStrategy) # class BailErrorStrategy(DefaultErrorStrategy): # Instead of recovering from exception {@code e}, re-throw it wrapped # in a {@link ParseCancellationException} so it is not caught by the # rule function catches. Use {@link Exception#getCause()} to get the # original {@link RecognitionException}. # def recover(self, recognizer:Parser, e:RecognitionException): context = recognizer._ctx while context is not None: context.exception = e context = context.parentCtx raise ParseCancellationException(e) # Make sure we don't attempt to recover inline; if the parser # successfully recovers, it won't throw an exception. # def recoverInline(self, recognizer:Parser): self.recover(recognizer, InputMismatchException(recognizer)) # Make sure we don't attempt to recover from problems in subrules.# def sync(self, recognizer:Parser): pass del Parser