Spaces:
Running
Running
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. | |
# Licensed to PSF under a Contributor Agreement. | |
"""This module defines the data structures used to represent a grammar. | |
These are a bit arcane because they are derived from the data | |
structures used by Python's 'pgen' parser generator. | |
There's also a table here mapping operators to their names in the | |
token module; the Python tokenize module reports all operators as the | |
fallback token code OP, but the parser needs the actual token code. | |
""" | |
# Python imports | |
import pickle | |
# Local imports | |
from . import token | |
class Grammar(object): | |
"""Pgen parsing tables conversion class. | |
Once initialized, this class supplies the grammar tables for the | |
parsing engine implemented by parse.py. The parsing engine | |
accesses the instance variables directly. The class here does not | |
provide initialization of the tables; several subclasses exist to | |
do this (see the conv and pgen modules). | |
The load() method reads the tables from a pickle file, which is | |
much faster than the other ways offered by subclasses. The pickle | |
file is written by calling dump() (after loading the grammar | |
tables using a subclass). The report() method prints a readable | |
representation of the tables to stdout, for debugging. | |
The instance variables are as follows: | |
symbol2number -- a dict mapping symbol names to numbers. Symbol | |
numbers are always 256 or higher, to distinguish | |
them from token numbers, which are between 0 and | |
255 (inclusive). | |
number2symbol -- a dict mapping numbers to symbol names; | |
these two are each other's inverse. | |
states -- a list of DFAs, where each DFA is a list of | |
states, each state is a list of arcs, and each | |
arc is a (i, j) pair where i is a label and j is | |
a state number. The DFA number is the index into | |
this list. (This name is slightly confusing.) | |
Final states are represented by a special arc of | |
the form (0, j) where j is its own state number. | |
dfas -- a dict mapping symbol numbers to (DFA, first) | |
pairs, where DFA is an item from the states list | |
above, and first is a set of tokens that can | |
begin this grammar rule (represented by a dict | |
whose values are always 1). | |
labels -- a list of (x, y) pairs where x is either a token | |
number or a symbol number, and y is either None | |
or a string; the strings are keywords. The label | |
number is the index in this list; label numbers | |
are used to mark state transitions (arcs) in the | |
DFAs. | |
start -- the number of the grammar's start symbol. | |
keywords -- a dict mapping keyword strings to arc labels. | |
tokens -- a dict mapping token numbers to arc labels. | |
""" | |
def __init__(self): | |
self.symbol2number = {} | |
self.number2symbol = {} | |
self.states = [] | |
self.dfas = {} | |
self.labels = [(0, "EMPTY")] | |
self.keywords = {} | |
self.tokens = {} | |
self.symbol2label = {} | |
self.start = 256 | |
def dump(self, filename): | |
"""Dump the grammar tables to a pickle file.""" | |
with open(filename, "wb") as f: | |
pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL) | |
def load(self, filename): | |
"""Load the grammar tables from a pickle file.""" | |
with open(filename, "rb") as f: | |
d = pickle.load(f) | |
self.__dict__.update(d) | |
def loads(self, pkl): | |
"""Load the grammar tables from a pickle bytes object.""" | |
self.__dict__.update(pickle.loads(pkl)) | |
def copy(self): | |
""" | |
Copy the grammar. | |
""" | |
new = self.__class__() | |
for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", | |
"tokens", "symbol2label"): | |
setattr(new, dict_attr, getattr(self, dict_attr).copy()) | |
new.labels = self.labels[:] | |
new.states = self.states[:] | |
new.start = self.start | |
return new | |
def report(self): | |
"""Dump the grammar tables to standard output, for debugging.""" | |
from pprint import pprint | |
print("s2n") | |
pprint(self.symbol2number) | |
print("n2s") | |
pprint(self.number2symbol) | |
print("states") | |
pprint(self.states) | |
print("dfas") | |
pprint(self.dfas) | |
print("labels") | |
pprint(self.labels) | |
print("start", self.start) | |
# Map from operator to number (since tokenize doesn't do this) | |
opmap_raw = """ | |
( LPAR | |
) RPAR | |
[ LSQB | |
] RSQB | |
: COLON | |
, COMMA | |
; SEMI | |
+ PLUS | |
- MINUS | |
* STAR | |
/ SLASH | |
| VBAR | |
& AMPER | |
< LESS | |
> GREATER | |
= EQUAL | |
. DOT | |
% PERCENT | |
` BACKQUOTE | |
{ LBRACE | |
} RBRACE | |
@ AT | |
@= ATEQUAL | |
== EQEQUAL | |
!= NOTEQUAL | |
<> NOTEQUAL | |
<= LESSEQUAL | |
>= GREATEREQUAL | |
~ TILDE | |
^ CIRCUMFLEX | |
<< LEFTSHIFT | |
>> RIGHTSHIFT | |
** DOUBLESTAR | |
+= PLUSEQUAL | |
-= MINEQUAL | |
*= STAREQUAL | |
/= SLASHEQUAL | |
%= PERCENTEQUAL | |
&= AMPEREQUAL | |
|= VBAREQUAL | |
^= CIRCUMFLEXEQUAL | |
<<= LEFTSHIFTEQUAL | |
>>= RIGHTSHIFTEQUAL | |
**= DOUBLESTAREQUAL | |
// DOUBLESLASH | |
//= DOUBLESLASHEQUAL | |
-> RARROW | |
:= COLONEQUAL | |
""" | |
opmap = {} | |
for line in opmap_raw.splitlines(): | |
if line: | |
op, name = line.split() | |
opmap[op] = getattr(token, name) | |