|
from fontTools.misc.textTools import bytechr, byteord, bytesjoin, tobytes, tostr |
|
from fontTools.misc import eexec |
|
from .psOperators import ( |
|
PSOperators, |
|
ps_StandardEncoding, |
|
ps_array, |
|
ps_boolean, |
|
ps_dict, |
|
ps_integer, |
|
ps_literal, |
|
ps_mark, |
|
ps_name, |
|
ps_operator, |
|
ps_procedure, |
|
ps_procmark, |
|
ps_real, |
|
ps_string, |
|
) |
|
import re |
|
from collections.abc import Callable |
|
from string import whitespace |
|
import logging |
|
|
|
|
|
log = logging.getLogger(__name__) |
|
|
|
ps_special = b"()<>[]{}%" |
|
|
|
skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"])) |
|
endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"]) |
|
endofthingRE = re.compile(endofthingPat) |
|
commentRE = re.compile(b"%[^\n\r]*") |
|
|
|
|
|
stringPat = rb""" |
|
\( |
|
( |
|
( |
|
[^()]* \ [()] |
|
) |
|
| |
|
( |
|
[^()]* \( [^()]* \) |
|
) |
|
)* |
|
[^()]* |
|
\) |
|
""" |
|
stringPat = b"".join(stringPat.split()) |
|
stringRE = re.compile(stringPat) |
|
|
|
hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"])) |
|
|
|
|
|
class PSTokenError(Exception): |
|
pass |
|
|
|
|
|
class PSError(Exception): |
|
pass |
|
|
|
|
|
class PSTokenizer(object): |
|
def __init__(self, buf=b"", encoding="ascii"): |
|
|
|
buf = tobytes(buf) |
|
self.buf = buf |
|
self.len = len(buf) |
|
self.pos = 0 |
|
self.closed = False |
|
self.encoding = encoding |
|
|
|
def read(self, n=-1): |
|
"""Read at most 'n' bytes from the buffer, or less if the read |
|
hits EOF before obtaining 'n' bytes. |
|
If 'n' is negative or omitted, read all data until EOF is reached. |
|
""" |
|
if self.closed: |
|
raise ValueError("I/O operation on closed file") |
|
if n is None or n < 0: |
|
newpos = self.len |
|
else: |
|
newpos = min(self.pos + n, self.len) |
|
r = self.buf[self.pos : newpos] |
|
self.pos = newpos |
|
return r |
|
|
|
def close(self): |
|
if not self.closed: |
|
self.closed = True |
|
del self.buf, self.pos |
|
|
|
def getnexttoken( |
|
self, |
|
|
|
len=len, |
|
ps_special=ps_special, |
|
stringmatch=stringRE.match, |
|
hexstringmatch=hexstringRE.match, |
|
commentmatch=commentRE.match, |
|
endmatch=endofthingRE.match, |
|
): |
|
|
|
self.skipwhite() |
|
if self.pos >= self.len: |
|
return None, None |
|
pos = self.pos |
|
buf = self.buf |
|
char = bytechr(byteord(buf[pos])) |
|
if char in ps_special: |
|
if char in b"{}[]": |
|
tokentype = "do_special" |
|
token = char |
|
elif char == b"%": |
|
tokentype = "do_comment" |
|
_, nextpos = commentmatch(buf, pos).span() |
|
token = buf[pos:nextpos] |
|
elif char == b"(": |
|
tokentype = "do_string" |
|
m = stringmatch(buf, pos) |
|
if m is None: |
|
raise PSTokenError("bad string at character %d" % pos) |
|
_, nextpos = m.span() |
|
token = buf[pos:nextpos] |
|
elif char == b"<": |
|
tokentype = "do_hexstring" |
|
m = hexstringmatch(buf, pos) |
|
if m is None: |
|
raise PSTokenError("bad hexstring at character %d" % pos) |
|
_, nextpos = m.span() |
|
token = buf[pos:nextpos] |
|
else: |
|
raise PSTokenError("bad token at character %d" % pos) |
|
else: |
|
if char == b"/": |
|
tokentype = "do_literal" |
|
m = endmatch(buf, pos + 1) |
|
else: |
|
tokentype = "" |
|
m = endmatch(buf, pos) |
|
if m is None: |
|
raise PSTokenError("bad token at character %d" % pos) |
|
_, nextpos = m.span() |
|
token = buf[pos:nextpos] |
|
self.pos = pos + len(token) |
|
token = tostr(token, encoding=self.encoding) |
|
return tokentype, token |
|
|
|
def skipwhite(self, whitematch=skipwhiteRE.match): |
|
_, nextpos = whitematch(self.buf, self.pos).span() |
|
self.pos = nextpos |
|
|
|
def starteexec(self): |
|
self.pos = self.pos + 1 |
|
self.dirtybuf = self.buf[self.pos :] |
|
self.buf, R = eexec.decrypt(self.dirtybuf, 55665) |
|
self.len = len(self.buf) |
|
self.pos = 4 |
|
|
|
def stopeexec(self): |
|
if not hasattr(self, "dirtybuf"): |
|
return |
|
self.buf = self.dirtybuf |
|
del self.dirtybuf |
|
|
|
|
|
class PSInterpreter(PSOperators): |
|
def __init__(self, encoding="ascii"): |
|
systemdict = {} |
|
userdict = {} |
|
self.encoding = encoding |
|
self.dictstack = [systemdict, userdict] |
|
self.stack = [] |
|
self.proclevel = 0 |
|
self.procmark = ps_procmark() |
|
self.fillsystemdict() |
|
|
|
def fillsystemdict(self): |
|
systemdict = self.dictstack[0] |
|
systemdict["["] = systemdict["mark"] = self.mark = ps_mark() |
|
systemdict["]"] = ps_operator("]", self.do_makearray) |
|
systemdict["true"] = ps_boolean(1) |
|
systemdict["false"] = ps_boolean(0) |
|
systemdict["StandardEncoding"] = ps_array(ps_StandardEncoding) |
|
systemdict["FontDirectory"] = ps_dict({}) |
|
self.suckoperators(systemdict, self.__class__) |
|
|
|
def suckoperators(self, systemdict, klass): |
|
for name in dir(klass): |
|
attr = getattr(self, name) |
|
if isinstance(attr, Callable) and name[:3] == "ps_": |
|
name = name[3:] |
|
systemdict[name] = ps_operator(name, attr) |
|
for baseclass in klass.__bases__: |
|
self.suckoperators(systemdict, baseclass) |
|
|
|
def interpret(self, data, getattr=getattr): |
|
tokenizer = self.tokenizer = PSTokenizer(data, self.encoding) |
|
getnexttoken = tokenizer.getnexttoken |
|
do_token = self.do_token |
|
handle_object = self.handle_object |
|
try: |
|
while 1: |
|
tokentype, token = getnexttoken() |
|
if not token: |
|
break |
|
if tokentype: |
|
handler = getattr(self, tokentype) |
|
object = handler(token) |
|
else: |
|
object = do_token(token) |
|
if object is not None: |
|
handle_object(object) |
|
tokenizer.close() |
|
self.tokenizer = None |
|
except: |
|
if self.tokenizer is not None: |
|
log.debug( |
|
"ps error:\n" |
|
"- - - - - - -\n" |
|
"%s\n" |
|
">>>\n" |
|
"%s\n" |
|
"- - - - - - -", |
|
self.tokenizer.buf[self.tokenizer.pos - 50 : self.tokenizer.pos], |
|
self.tokenizer.buf[self.tokenizer.pos : self.tokenizer.pos + 50], |
|
) |
|
raise |
|
|
|
def handle_object(self, object): |
|
if not (self.proclevel or object.literal or object.type == "proceduretype"): |
|
if object.type != "operatortype": |
|
object = self.resolve_name(object.value) |
|
if object.literal: |
|
self.push(object) |
|
else: |
|
if object.type == "proceduretype": |
|
self.call_procedure(object) |
|
else: |
|
object.function() |
|
else: |
|
self.push(object) |
|
|
|
def call_procedure(self, proc): |
|
handle_object = self.handle_object |
|
for item in proc.value: |
|
handle_object(item) |
|
|
|
def resolve_name(self, name): |
|
dictstack = self.dictstack |
|
for i in range(len(dictstack) - 1, -1, -1): |
|
if name in dictstack[i]: |
|
return dictstack[i][name] |
|
raise PSError("name error: " + str(name)) |
|
|
|
def do_token( |
|
self, |
|
token, |
|
int=int, |
|
float=float, |
|
ps_name=ps_name, |
|
ps_integer=ps_integer, |
|
ps_real=ps_real, |
|
): |
|
try: |
|
num = int(token) |
|
except (ValueError, OverflowError): |
|
try: |
|
num = float(token) |
|
except (ValueError, OverflowError): |
|
if "#" in token: |
|
hashpos = token.find("#") |
|
try: |
|
base = int(token[:hashpos]) |
|
num = int(token[hashpos + 1 :], base) |
|
except (ValueError, OverflowError): |
|
return ps_name(token) |
|
else: |
|
return ps_integer(num) |
|
else: |
|
return ps_name(token) |
|
else: |
|
return ps_real(num) |
|
else: |
|
return ps_integer(num) |
|
|
|
def do_comment(self, token): |
|
pass |
|
|
|
def do_literal(self, token): |
|
return ps_literal(token[1:]) |
|
|
|
def do_string(self, token): |
|
return ps_string(token[1:-1]) |
|
|
|
def do_hexstring(self, token): |
|
hexStr = "".join(token[1:-1].split()) |
|
if len(hexStr) % 2: |
|
hexStr = hexStr + "0" |
|
cleanstr = [] |
|
for i in range(0, len(hexStr), 2): |
|
cleanstr.append(chr(int(hexStr[i : i + 2], 16))) |
|
cleanstr = "".join(cleanstr) |
|
return ps_string(cleanstr) |
|
|
|
def do_special(self, token): |
|
if token == "{": |
|
self.proclevel = self.proclevel + 1 |
|
return self.procmark |
|
elif token == "}": |
|
proc = [] |
|
while 1: |
|
topobject = self.pop() |
|
if topobject == self.procmark: |
|
break |
|
proc.append(topobject) |
|
self.proclevel = self.proclevel - 1 |
|
proc.reverse() |
|
return ps_procedure(proc) |
|
elif token == "[": |
|
return self.mark |
|
elif token == "]": |
|
return ps_name("]") |
|
else: |
|
raise PSTokenError("huh?") |
|
|
|
def push(self, object): |
|
self.stack.append(object) |
|
|
|
def pop(self, *types): |
|
stack = self.stack |
|
if not stack: |
|
raise PSError("stack underflow") |
|
object = stack[-1] |
|
if types: |
|
if object.type not in types: |
|
raise PSError( |
|
"typecheck, expected %s, found %s" % (repr(types), object.type) |
|
) |
|
del stack[-1] |
|
return object |
|
|
|
def do_makearray(self): |
|
array = [] |
|
while 1: |
|
topobject = self.pop() |
|
if topobject == self.mark: |
|
break |
|
array.append(topobject) |
|
array.reverse() |
|
self.push(ps_array(array)) |
|
|
|
def close(self): |
|
"""Remove circular references.""" |
|
del self.stack |
|
del self.dictstack |
|
|
|
|
|
def unpack_item(item): |
|
tp = type(item.value) |
|
if tp == dict: |
|
newitem = {} |
|
for key, value in item.value.items(): |
|
newitem[key] = unpack_item(value) |
|
elif tp == list: |
|
newitem = [None] * len(item.value) |
|
for i in range(len(item.value)): |
|
newitem[i] = unpack_item(item.value[i]) |
|
if item.type == "proceduretype": |
|
newitem = tuple(newitem) |
|
else: |
|
newitem = item.value |
|
return newitem |
|
|
|
|
|
def suckfont(data, encoding="ascii"): |
|
m = re.search(rb"/FontName\s+/([^ \t\n\r]+)\s+def", data) |
|
if m: |
|
fontName = m.group(1) |
|
fontName = fontName.decode() |
|
else: |
|
fontName = None |
|
interpreter = PSInterpreter(encoding=encoding) |
|
interpreter.interpret( |
|
b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop" |
|
) |
|
interpreter.interpret(data) |
|
fontdir = interpreter.dictstack[0]["FontDirectory"].value |
|
if fontName in fontdir: |
|
rawfont = fontdir[fontName] |
|
else: |
|
|
|
fontNames = list(fontdir.keys()) |
|
if len(fontNames) > 1: |
|
fontNames.remove("Helvetica") |
|
fontNames.sort() |
|
rawfont = fontdir[fontNames[0]] |
|
interpreter.close() |
|
return unpack_item(rawfont) |
|
|