Spaces:
Runtime error
Runtime error
File size: 25,465 Bytes
8a6cf24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 |
#
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
#/
# When we hit an accept state in either the DFA or the ATN, we
# have to notify the character stream to start buffering characters
# via {@link IntStream#mark} and record the current state. The current sim state
# includes the current index into the input, the current line,
# and current character position in that line. Note that the Lexer is
# tracking the starting line and characterization of the token. These
# variables track the "state" of the simulator when it hits an accept state.
#
# <p>We track these variables separately for the DFA and ATN simulation
# because the DFA simulation often has to fail over to the ATN
# simulation. If the ATN simulation fails, we need the DFA to fall
# back to its previously accepted state, if any. If the ATN succeeds,
# then the ATN does the accept and the DFA simulator that invoked it
# can simply return the predicted token type.</p>
#/
from antlr4.PredictionContext import PredictionContextCache, SingletonPredictionContext, PredictionContext
from antlr4.InputStream import InputStream
from antlr4.Token import Token
from antlr4.atn.ATN import ATN
from antlr4.atn.ATNConfig import LexerATNConfig
from antlr4.atn.ATNSimulator import ATNSimulator
from antlr4.atn.ATNConfigSet import ATNConfigSet, OrderedATNConfigSet
from antlr4.atn.ATNState import RuleStopState, ATNState
from antlr4.atn.LexerActionExecutor import LexerActionExecutor
from antlr4.atn.Transition import Transition
from antlr4.dfa.DFAState import DFAState
from antlr4.error.Errors import LexerNoViableAltException, UnsupportedOperationException
class SimState(object):
__slots__ = ('index', 'line', 'column', 'dfaState')
def __init__(self):
self.reset()
def reset(self):
self.index = -1
self.line = 0
self.column = -1
self.dfaState = None
# need forward declaration
Lexer = None
LexerATNSimulator = None
class LexerATNSimulator(ATNSimulator):
__slots__ = (
'decisionToDFA', 'recog', 'startIndex', 'line', 'column', 'mode',
'DEFAULT_MODE', 'MAX_CHAR_VALUE', 'prevAccept'
)
debug = False
dfa_debug = False
MIN_DFA_EDGE = 0
MAX_DFA_EDGE = 127 # forces unicode to stay in ATN
ERROR = None
def __init__(self, recog:Lexer, atn:ATN, decisionToDFA:list, sharedContextCache:PredictionContextCache):
super().__init__(atn, sharedContextCache)
self.decisionToDFA = decisionToDFA
self.recog = recog
# The current token's starting index into the character stream.
# Shared across DFA to ATN simulation in case the ATN fails and the
# DFA did not have a previous accept state. In this case, we use the
# ATN-generated exception object.
self.startIndex = -1
# line number 1..n within the input#/
self.line = 1
# The index of the character relative to the beginning of the line 0..n-1#/
self.column = 0
from antlr4.Lexer import Lexer
self.mode = Lexer.DEFAULT_MODE
# Cache Lexer properties to avoid further imports
self.DEFAULT_MODE = Lexer.DEFAULT_MODE
self.MAX_CHAR_VALUE = Lexer.MAX_CHAR_VALUE
# Used during DFA/ATN exec to record the most recent accept configuration info
self.prevAccept = SimState()
def copyState(self, simulator:LexerATNSimulator ):
self.column = simulator.column
self.line = simulator.line
self.mode = simulator.mode
self.startIndex = simulator.startIndex
def match(self, input:InputStream , mode:int):
self.mode = mode
mark = input.mark()
try:
self.startIndex = input.index
self.prevAccept.reset()
dfa = self.decisionToDFA[mode]
if dfa.s0 is None:
return self.matchATN(input)
else:
return self.execATN(input, dfa.s0)
finally:
input.release(mark)
def reset(self):
self.prevAccept.reset()
self.startIndex = -1
self.line = 1
self.column = 0
self.mode = self.DEFAULT_MODE
def matchATN(self, input:InputStream):
startState = self.atn.modeToStartState[self.mode]
if LexerATNSimulator.debug:
print("matchATN mode " + str(self.mode) + " start: " + str(startState))
old_mode = self.mode
s0_closure = self.computeStartState(input, startState)
suppressEdge = s0_closure.hasSemanticContext
s0_closure.hasSemanticContext = False
next = self.addDFAState(s0_closure)
if not suppressEdge:
self.decisionToDFA[self.mode].s0 = next
predict = self.execATN(input, next)
if LexerATNSimulator.debug:
print("DFA after matchATN: " + str(self.decisionToDFA[old_mode].toLexerString()))
return predict
def execATN(self, input:InputStream, ds0:DFAState):
if LexerATNSimulator.debug:
print("start state closure=" + str(ds0.configs))
if ds0.isAcceptState:
# allow zero-length tokens
self.captureSimState(self.prevAccept, input, ds0)
t = input.LA(1)
s = ds0 # s is current/from DFA state
while True: # while more work
if LexerATNSimulator.debug:
print("execATN loop starting closure:", str(s.configs))
# As we move src->trg, src->trg, we keep track of the previous trg to
# avoid looking up the DFA state again, which is expensive.
# If the previous target was already part of the DFA, we might
# be able to avoid doing a reach operation upon t. If s!=null,
# it means that semantic predicates didn't prevent us from
# creating a DFA state. Once we know s!=null, we check to see if
# the DFA state has an edge already for t. If so, we can just reuse
# it's configuration set; there's no point in re-computing it.
# This is kind of like doing DFA simulation within the ATN
# simulation because DFA simulation is really just a way to avoid
# computing reach/closure sets. Technically, once we know that
# we have a previously added DFA state, we could jump over to
# the DFA simulator. But, that would mean popping back and forth
# a lot and making things more complicated algorithmically.
# This optimization makes a lot of sense for loops within DFA.
# A character will take us back to an existing DFA state
# that already has lots of edges out of it. e.g., .* in comments.
# print("Target for:" + str(s) + " and:" + str(t))
target = self.getExistingTargetState(s, t)
# print("Existing:" + str(target))
if target is None:
target = self.computeTargetState(input, s, t)
# print("Computed:" + str(target))
if target == self.ERROR:
break
# If this is a consumable input element, make sure to consume before
# capturing the accept state so the input index, line, and char
# position accurately reflect the state of the interpreter at the
# end of the token.
if t != Token.EOF:
self.consume(input)
if target.isAcceptState:
self.captureSimState(self.prevAccept, input, target)
if t == Token.EOF:
break
t = input.LA(1)
s = target # flip; current DFA target becomes new src/from state
return self.failOrAccept(self.prevAccept, input, s.configs, t)
# Get an existing target state for an edge in the DFA. If the target state
# for the edge has not yet been computed or is otherwise not available,
# this method returns {@code null}.
#
# @param s The current DFA state
# @param t The next input symbol
# @return The existing target DFA state for the given input symbol
# {@code t}, or {@code null} if the target state for this edge is not
# already cached
def getExistingTargetState(self, s:DFAState, t:int):
if s.edges is None or t < self.MIN_DFA_EDGE or t > self.MAX_DFA_EDGE:
return None
target = s.edges[t - self.MIN_DFA_EDGE]
if LexerATNSimulator.debug and target is not None:
print("reuse state", str(s.stateNumber), "edge to", str(target.stateNumber))
return target
# Compute a target state for an edge in the DFA, and attempt to add the
# computed state and corresponding edge to the DFA.
#
# @param input The input stream
# @param s The current DFA state
# @param t The next input symbol
#
# @return The computed target DFA state for the given input symbol
# {@code t}. If {@code t} does not lead to a valid DFA state, this method
# returns {@link #ERROR}.
def computeTargetState(self, input:InputStream, s:DFAState, t:int):
reach = OrderedATNConfigSet()
# if we don't find an existing DFA state
# Fill reach starting from closure, following t transitions
self.getReachableConfigSet(input, s.configs, reach, t)
if len(reach)==0: # we got nowhere on t from s
if not reach.hasSemanticContext:
# we got nowhere on t, don't throw out this knowledge; it'd
# cause a failover from DFA later.
self. addDFAEdge(s, t, self.ERROR)
# stop when we can't match any more char
return self.ERROR
# Add an edge from s to target DFA found/created for reach
return self.addDFAEdge(s, t, cfgs=reach)
def failOrAccept(self, prevAccept:SimState , input:InputStream, reach:ATNConfigSet, t:int):
if self.prevAccept.dfaState is not None:
lexerActionExecutor = prevAccept.dfaState.lexerActionExecutor
self.accept(input, lexerActionExecutor, self.startIndex, prevAccept.index, prevAccept.line, prevAccept.column)
return prevAccept.dfaState.prediction
else:
# if no accept and EOF is first char, return EOF
if t==Token.EOF and input.index==self.startIndex:
return Token.EOF
raise LexerNoViableAltException(self.recog, input, self.startIndex, reach)
# Given a starting configuration set, figure out all ATN configurations
# we can reach upon input {@code t}. Parameter {@code reach} is a return
# parameter.
def getReachableConfigSet(self, input:InputStream, closure:ATNConfigSet, reach:ATNConfigSet, t:int):
# this is used to skip processing for configs which have a lower priority
# than a config that already reached an accept state for the same rule
skipAlt = ATN.INVALID_ALT_NUMBER
for cfg in closure:
currentAltReachedAcceptState = ( cfg.alt == skipAlt )
if currentAltReachedAcceptState and cfg.passedThroughNonGreedyDecision:
continue
if LexerATNSimulator.debug:
print("testing", self.getTokenName(t), "at", str(cfg))
for trans in cfg.state.transitions: # for each transition
target = self.getReachableTarget(trans, t)
if target is not None:
lexerActionExecutor = cfg.lexerActionExecutor
if lexerActionExecutor is not None:
lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.index - self.startIndex)
treatEofAsEpsilon = (t == Token.EOF)
config = LexerATNConfig(state=target, lexerActionExecutor=lexerActionExecutor, config=cfg)
if self.closure(input, config, reach, currentAltReachedAcceptState, True, treatEofAsEpsilon):
# any remaining configs for this alt have a lower priority than
# the one that just reached an accept state.
skipAlt = cfg.alt
def accept(self, input:InputStream, lexerActionExecutor:LexerActionExecutor, startIndex:int, index:int, line:int, charPos:int):
if LexerATNSimulator.debug:
print("ACTION", lexerActionExecutor)
# seek to after last char in token
input.seek(index)
self.line = line
self.column = charPos
if lexerActionExecutor is not None and self.recog is not None:
lexerActionExecutor.execute(self.recog, input, startIndex)
def getReachableTarget(self, trans:Transition, t:int):
if trans.matches(t, 0, self.MAX_CHAR_VALUE):
return trans.target
else:
return None
def computeStartState(self, input:InputStream, p:ATNState):
initialContext = PredictionContext.EMPTY
configs = OrderedATNConfigSet()
for i in range(0,len(p.transitions)):
target = p.transitions[i].target
c = LexerATNConfig(state=target, alt=i+1, context=initialContext)
self.closure(input, c, configs, False, False, False)
return configs
# Since the alternatives within any lexer decision are ordered by
# preference, this method stops pursuing the closure as soon as an accept
# state is reached. After the first accept state is reached by depth-first
# search from {@code config}, all other (potentially reachable) states for
# this rule would have a lower priority.
#
# @return {@code true} if an accept state is reached, otherwise
# {@code false}.
def closure(self, input:InputStream, config:LexerATNConfig, configs:ATNConfigSet, currentAltReachedAcceptState:bool,
speculative:bool, treatEofAsEpsilon:bool):
if LexerATNSimulator.debug:
print("closure(" + str(config) + ")")
if isinstance( config.state, RuleStopState ):
if LexerATNSimulator.debug:
if self.recog is not None:
print("closure at", self.recog.symbolicNames[config.state.ruleIndex], "rule stop", str(config))
else:
print("closure at rule stop", str(config))
if config.context is None or config.context.hasEmptyPath():
if config.context is None or config.context.isEmpty():
configs.add(config)
return True
else:
configs.add(LexerATNConfig(state=config.state, config=config, context=PredictionContext.EMPTY))
currentAltReachedAcceptState = True
if config.context is not None and not config.context.isEmpty():
for i in range(0,len(config.context)):
if config.context.getReturnState(i) != PredictionContext.EMPTY_RETURN_STATE:
newContext = config.context.getParent(i) # "pop" return state
returnState = self.atn.states[config.context.getReturnState(i)]
c = LexerATNConfig(state=returnState, config=config, context=newContext)
currentAltReachedAcceptState = self.closure(input, c, configs,
currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
return currentAltReachedAcceptState
# optimization
if not config.state.epsilonOnlyTransitions:
if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision:
configs.add(config)
for t in config.state.transitions:
c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
if c is not None:
currentAltReachedAcceptState = self.closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
return currentAltReachedAcceptState
# side-effect: can alter configs.hasSemanticContext
def getEpsilonTarget(self, input:InputStream, config:LexerATNConfig, t:Transition, configs:ATNConfigSet,
speculative:bool, treatEofAsEpsilon:bool):
c = None
if t.serializationType==Transition.RULE:
newContext = SingletonPredictionContext.create(config.context, t.followState.stateNumber)
c = LexerATNConfig(state=t.target, config=config, context=newContext)
elif t.serializationType==Transition.PRECEDENCE:
raise UnsupportedOperationException("Precedence predicates are not supported in lexers.")
elif t.serializationType==Transition.PREDICATE:
# Track traversing semantic predicates. If we traverse,
# we cannot add a DFA state for this "reach" computation
# because the DFA would not test the predicate again in the
# future. Rather than creating collections of semantic predicates
# like v3 and testing them on prediction, v4 will test them on the
# fly all the time using the ATN not the DFA. This is slower but
# semantically it's not used that often. One of the key elements to
# this predicate mechanism is not adding DFA states that see
# predicates immediately afterwards in the ATN. For example,
# a : ID {p1}? | ID {p2}? ;
# should create the start state for rule 'a' (to save start state
# competition), but should not create target of ID state. The
# collection of ATN states the following ID references includes
# states reached by traversing predicates. Since this is when we
# test them, we cannot cash the DFA state target of ID.
if LexerATNSimulator.debug:
print("EVAL rule "+ str(t.ruleIndex) + ":" + str(t.predIndex))
configs.hasSemanticContext = True
if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative):
c = LexerATNConfig(state=t.target, config=config)
elif t.serializationType==Transition.ACTION:
if config.context is None or config.context.hasEmptyPath():
# execute actions anywhere in the start rule for a token.
#
# TODO: if the entry rule is invoked recursively, some
# actions may be executed during the recursive call. The
# problem can appear when hasEmptyPath() is true but
# isEmpty() is false. In this case, the config needs to be
# split into two contexts - one with just the empty path
# and another with everything but the empty path.
# Unfortunately, the current algorithm does not allow
# getEpsilonTarget to return two configurations, so
# additional modifications are needed before we can support
# the split operation.
lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor,
self.atn.lexerActions[t.actionIndex])
c = LexerATNConfig(state=t.target, config=config, lexerActionExecutor=lexerActionExecutor)
else:
# ignore actions in referenced rules
c = LexerATNConfig(state=t.target, config=config)
elif t.serializationType==Transition.EPSILON:
c = LexerATNConfig(state=t.target, config=config)
elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]:
if treatEofAsEpsilon:
if t.matches(Token.EOF, 0, self.MAX_CHAR_VALUE):
c = LexerATNConfig(state=t.target, config=config)
return c
# Evaluate a predicate specified in the lexer.
#
# <p>If {@code speculative} is {@code true}, this method was called before
# {@link #consume} for the matched character. This method should call
# {@link #consume} before evaluating the predicate to ensure position
# sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine},
# and {@link Lexer#getcolumn}, properly reflect the current
# lexer state. This method should restore {@code input} and the simulator
# to the original state before returning (i.e. undo the actions made by the
# call to {@link #consume}.</p>
#
# @param input The input stream.
# @param ruleIndex The rule containing the predicate.
# @param predIndex The index of the predicate within the rule.
# @param speculative {@code true} if the current index in {@code input} is
# one character before the predicate's location.
#
# @return {@code true} if the specified predicate evaluates to
# {@code true}.
#/
def evaluatePredicate(self, input:InputStream, ruleIndex:int, predIndex:int, speculative:bool):
# assume true if no recognizer was provided
if self.recog is None:
return True
if not speculative:
return self.recog.sempred(None, ruleIndex, predIndex)
savedcolumn = self.column
savedLine = self.line
index = input.index
marker = input.mark()
try:
self.consume(input)
return self.recog.sempred(None, ruleIndex, predIndex)
finally:
self.column = savedcolumn
self.line = savedLine
input.seek(index)
input.release(marker)
def captureSimState(self, settings:SimState, input:InputStream, dfaState:DFAState):
settings.index = input.index
settings.line = self.line
settings.column = self.column
settings.dfaState = dfaState
def addDFAEdge(self, from_:DFAState, tk:int, to:DFAState=None, cfgs:ATNConfigSet=None) -> DFAState:
if to is None and cfgs is not None:
# leading to this call, ATNConfigSet.hasSemanticContext is used as a
# marker indicating dynamic predicate evaluation makes this edge
# dependent on the specific input sequence, so the static edge in the
# DFA should be omitted. The target DFAState is still created since
# execATN has the ability to resynchronize with the DFA state cache
# following the predicate evaluation step.
#
# TJP notes: next time through the DFA, we see a pred again and eval.
# If that gets us to a previously created (but dangling) DFA
# state, we can continue in pure DFA mode from there.
#/
suppressEdge = cfgs.hasSemanticContext
cfgs.hasSemanticContext = False
to = self.addDFAState(cfgs)
if suppressEdge:
return to
# add the edge
if tk < self.MIN_DFA_EDGE or tk > self.MAX_DFA_EDGE:
# Only track edges within the DFA bounds
return to
if LexerATNSimulator.debug:
print("EDGE " + str(from_) + " -> " + str(to) + " upon "+ chr(tk))
if from_.edges is None:
# make room for tokens 1..n and -1 masquerading as index 0
from_.edges = [ None ] * (self.MAX_DFA_EDGE - self.MIN_DFA_EDGE + 1)
from_.edges[tk - self.MIN_DFA_EDGE] = to # connect
return to
# Add a new DFA state if there isn't one with this set of
# configurations already. This method also detects the first
# configuration containing an ATN rule stop state. Later, when
# traversing the DFA, we will know which rule to accept.
def addDFAState(self, configs:ATNConfigSet) -> DFAState:
proposed = DFAState(configs=configs)
firstConfigWithRuleStopState = next((cfg for cfg in configs if isinstance(cfg.state, RuleStopState)), None)
if firstConfigWithRuleStopState is not None:
proposed.isAcceptState = True
proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor
proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]
dfa = self.decisionToDFA[self.mode]
existing = dfa.states.get(proposed, None)
if existing is not None:
return existing
newState = proposed
newState.stateNumber = len(dfa.states)
configs.setReadonly(True)
newState.configs = configs
dfa.states[newState] = newState
return newState
def getDFA(self, mode:int):
return self.decisionToDFA[mode]
# Get the text matched so far for the current token.
def getText(self, input:InputStream):
# index is first lookahead char, don't include.
return input.getText(self.startIndex, input.index-1)
def consume(self, input:InputStream):
curChar = input.LA(1)
if curChar==ord('\n'):
self.line += 1
self.column = 0
else:
self.column += 1
input.consume()
def getTokenName(self, t:int):
if t==-1:
return "EOF"
else:
return "'" + chr(t) + "'"
LexerATNSimulator.ERROR = DFAState(0x7FFFFFFF, ATNConfigSet())
del Lexer
|