Spaces:
Paused
Paused
| """Module for reading TFM (TeX Font Metrics) files. | |
| The TFM format is described in the TFtoPL WEB source code, whose typeset form | |
| can be found on `CTAN <http://mirrors.ctan.org/info/knuth-pdf/texware/tftopl.pdf>`_. | |
| >>> from fontTools.tfmLib import TFM | |
| >>> tfm = TFM("Tests/tfmLib/data/cmr10.tfm") | |
| >>> | |
| >>> # Accessing an attribute gets you metadata. | |
| >>> tfm.checksum | |
| 1274110073 | |
| >>> tfm.designsize | |
| 10.0 | |
| >>> tfm.codingscheme | |
| 'TeX text' | |
| >>> tfm.family | |
| 'CMR' | |
| >>> tfm.seven_bit_safe_flag | |
| False | |
| >>> tfm.face | |
| 234 | |
| >>> tfm.extraheader | |
| {} | |
| >>> tfm.fontdimens | |
| {'SLANT': 0.0, 'SPACE': 0.33333396911621094, 'STRETCH': 0.16666698455810547, 'SHRINK': 0.11111164093017578, 'XHEIGHT': 0.4305553436279297, 'QUAD': 1.0000028610229492, 'EXTRASPACE': 0.11111164093017578} | |
| >>> # Accessing a character gets you its metrics. | |
| >>> # “width” is always available, other metrics are available only when | |
| >>> # applicable. All values are relative to “designsize”. | |
| >>> tfm.chars[ord("g")] | |
| {'width': 0.5000019073486328, 'height': 0.4305553436279297, 'depth': 0.1944446563720703, 'italic': 0.013888359069824219} | |
| >>> # Kerning and ligature can be accessed as well. | |
| >>> tfm.kerning[ord("c")] | |
| {104: -0.02777862548828125, 107: -0.02777862548828125} | |
| >>> tfm.ligatures[ord("f")] | |
| {105: ('LIG', 12), 102: ('LIG', 11), 108: ('LIG', 13)} | |
| """ | |
| from types import SimpleNamespace | |
| from fontTools.misc.sstruct import calcsize, unpack, unpack2 | |
| SIZES_FORMAT = """ | |
| > | |
| lf: h # length of the entire file, in words | |
| lh: h # length of the header data, in words | |
| bc: h # smallest character code in the font | |
| ec: h # largest character code in the font | |
| nw: h # number of words in the width table | |
| nh: h # number of words in the height table | |
| nd: h # number of words in the depth table | |
| ni: h # number of words in the italic correction table | |
| nl: h # number of words in the ligature/kern table | |
| nk: h # number of words in the kern table | |
| ne: h # number of words in the extensible character table | |
| np: h # number of font parameter words | |
| """ | |
| SIZES_SIZE = calcsize(SIZES_FORMAT) | |
| FIXED_FORMAT = "12.20F" | |
| HEADER_FORMAT1 = f""" | |
| > | |
| checksum: L | |
| designsize: {FIXED_FORMAT} | |
| """ | |
| HEADER_FORMAT2 = f""" | |
| {HEADER_FORMAT1} | |
| codingscheme: 40p | |
| """ | |
| HEADER_FORMAT3 = f""" | |
| {HEADER_FORMAT2} | |
| family: 20p | |
| """ | |
| HEADER_FORMAT4 = f""" | |
| {HEADER_FORMAT3} | |
| seven_bit_safe_flag: ? | |
| ignored: x | |
| ignored: x | |
| face: B | |
| """ | |
| HEADER_SIZE1 = calcsize(HEADER_FORMAT1) | |
| HEADER_SIZE2 = calcsize(HEADER_FORMAT2) | |
| HEADER_SIZE3 = calcsize(HEADER_FORMAT3) | |
| HEADER_SIZE4 = calcsize(HEADER_FORMAT4) | |
| LIG_KERN_COMMAND = """ | |
| > | |
| skip_byte: B | |
| next_char: B | |
| op_byte: B | |
| remainder: B | |
| """ | |
| BASE_PARAMS = [ | |
| "SLANT", | |
| "SPACE", | |
| "STRETCH", | |
| "SHRINK", | |
| "XHEIGHT", | |
| "QUAD", | |
| "EXTRASPACE", | |
| ] | |
| MATHSY_PARAMS = [ | |
| "NUM1", | |
| "NUM2", | |
| "NUM3", | |
| "DENOM1", | |
| "DENOM2", | |
| "SUP1", | |
| "SUP2", | |
| "SUP3", | |
| "SUB1", | |
| "SUB2", | |
| "SUPDROP", | |
| "SUBDROP", | |
| "DELIM1", | |
| "DELIM2", | |
| "AXISHEIGHT", | |
| ] | |
| MATHEX_PARAMS = [ | |
| "DEFAULTRULETHICKNESS", | |
| "BIGOPSPACING1", | |
| "BIGOPSPACING2", | |
| "BIGOPSPACING3", | |
| "BIGOPSPACING4", | |
| "BIGOPSPACING5", | |
| ] | |
| VANILLA = 0 | |
| MATHSY = 1 | |
| MATHEX = 2 | |
| UNREACHABLE = 0 | |
| PASSTHROUGH = 1 | |
| ACCESSABLE = 2 | |
| NO_TAG = 0 | |
| LIG_TAG = 1 | |
| LIST_TAG = 2 | |
| EXT_TAG = 3 | |
| STOP_FLAG = 128 | |
| KERN_FLAG = 128 | |
| class TFMException(Exception): | |
| def __init__(self, message): | |
| super().__init__(message) | |
| class TFM: | |
| def __init__(self, file): | |
| self._read(file) | |
| def __repr__(self): | |
| return ( | |
| f"<TFM" | |
| f" for {self.family}" | |
| f" in {self.codingscheme}" | |
| f" at {self.designsize:g}pt>" | |
| ) | |
| def _read(self, file): | |
| if hasattr(file, "read"): | |
| data = file.read() | |
| else: | |
| with open(file, "rb") as fp: | |
| data = fp.read() | |
| self._data = data | |
| if len(data) < SIZES_SIZE: | |
| raise TFMException("Too short input file") | |
| sizes = SimpleNamespace() | |
| unpack2(SIZES_FORMAT, data, sizes) | |
| # Do some file structure sanity checks. | |
| # TeX and TFtoPL do additional functional checks and might even correct | |
| # “errors” in the input file, but we instead try to output the file as | |
| # it is as long as it is parsable, even if the data make no sense. | |
| if sizes.lf < 0: | |
| raise TFMException("The file claims to have negative or zero length!") | |
| if len(data) < sizes.lf * 4: | |
| raise TFMException("The file has fewer bytes than it claims!") | |
| for name, length in vars(sizes).items(): | |
| if length < 0: | |
| raise TFMException("The subfile size: '{name}' is negative!") | |
| if sizes.lh < 2: | |
| raise TFMException(f"The header length is only {sizes.lh}!") | |
| if sizes.bc > sizes.ec + 1 or sizes.ec > 255: | |
| raise TFMException( | |
| f"The character code range {sizes.bc}..{sizes.ec} is illegal!" | |
| ) | |
| if sizes.nw == 0 or sizes.nh == 0 or sizes.nd == 0 or sizes.ni == 0: | |
| raise TFMException("Incomplete subfiles for character dimensions!") | |
| if sizes.ne > 256: | |
| raise TFMException(f"There are {ne} extensible recipes!") | |
| if sizes.lf != ( | |
| 6 | |
| + sizes.lh | |
| + (sizes.ec - sizes.bc + 1) | |
| + sizes.nw | |
| + sizes.nh | |
| + sizes.nd | |
| + sizes.ni | |
| + sizes.nl | |
| + sizes.nk | |
| + sizes.ne | |
| + sizes.np | |
| ): | |
| raise TFMException("Subfile sizes don’t add up to the stated total") | |
| # Subfile offsets, used in the helper function below. These all are | |
| # 32-bit word offsets not 8-bit byte offsets. | |
| char_base = 6 + sizes.lh - sizes.bc | |
| width_base = char_base + sizes.ec + 1 | |
| height_base = width_base + sizes.nw | |
| depth_base = height_base + sizes.nh | |
| italic_base = depth_base + sizes.nd | |
| lig_kern_base = italic_base + sizes.ni | |
| kern_base = lig_kern_base + sizes.nl | |
| exten_base = kern_base + sizes.nk | |
| param_base = exten_base + sizes.ne | |
| # Helper functions for accessing individual data. If this looks | |
| # nonidiomatic Python, I blame the effect of reading the literate WEB | |
| # documentation of TFtoPL. | |
| def char_info(c): | |
| return 4 * (char_base + c) | |
| def width_index(c): | |
| return data[char_info(c)] | |
| def noneexistent(c): | |
| return c < sizes.bc or c > sizes.ec or width_index(c) == 0 | |
| def height_index(c): | |
| return data[char_info(c) + 1] // 16 | |
| def depth_index(c): | |
| return data[char_info(c) + 1] % 16 | |
| def italic_index(c): | |
| return data[char_info(c) + 2] // 4 | |
| def tag(c): | |
| return data[char_info(c) + 2] % 4 | |
| def remainder(c): | |
| return data[char_info(c) + 3] | |
| def width(c): | |
| r = 4 * (width_base + width_index(c)) | |
| return read_fixed(r, "v")["v"] | |
| def height(c): | |
| r = 4 * (height_base + height_index(c)) | |
| return read_fixed(r, "v")["v"] | |
| def depth(c): | |
| r = 4 * (depth_base + depth_index(c)) | |
| return read_fixed(r, "v")["v"] | |
| def italic(c): | |
| r = 4 * (italic_base + italic_index(c)) | |
| return read_fixed(r, "v")["v"] | |
| def exten(c): | |
| return 4 * (exten_base + remainder(c)) | |
| def lig_step(i): | |
| return 4 * (lig_kern_base + i) | |
| def lig_kern_command(i): | |
| command = SimpleNamespace() | |
| unpack2(LIG_KERN_COMMAND, data[i:], command) | |
| return command | |
| def kern(i): | |
| r = 4 * (kern_base + i) | |
| return read_fixed(r, "v")["v"] | |
| def param(i): | |
| return 4 * (param_base + i) | |
| def read_fixed(index, key, obj=None): | |
| ret = unpack2(f">;{key}:{FIXED_FORMAT}", data[index:], obj) | |
| return ret[0] | |
| # Set all attributes to empty values regardless of the header size. | |
| unpack(HEADER_FORMAT4, [0] * HEADER_SIZE4, self) | |
| offset = 24 | |
| length = sizes.lh * 4 | |
| self.extraheader = {} | |
| if length >= HEADER_SIZE4: | |
| rest = unpack2(HEADER_FORMAT4, data[offset:], self)[1] | |
| if self.face < 18: | |
| s = self.face % 2 | |
| b = self.face // 2 | |
| self.face = "MBL"[b % 3] + "RI"[s] + "RCE"[b // 3] | |
| for i in range(sizes.lh - HEADER_SIZE4 // 4): | |
| rest = unpack2(f">;HEADER{i + 18}:l", rest, self.extraheader)[1] | |
| elif length >= HEADER_SIZE3: | |
| unpack2(HEADER_FORMAT3, data[offset:], self) | |
| elif length >= HEADER_SIZE2: | |
| unpack2(HEADER_FORMAT2, data[offset:], self) | |
| elif length >= HEADER_SIZE1: | |
| unpack2(HEADER_FORMAT1, data[offset:], self) | |
| self.fonttype = VANILLA | |
| scheme = self.codingscheme.upper() | |
| if scheme.startswith("TEX MATH SY"): | |
| self.fonttype = MATHSY | |
| elif scheme.startswith("TEX MATH EX"): | |
| self.fonttype = MATHEX | |
| self.fontdimens = {} | |
| for i in range(sizes.np): | |
| name = f"PARAMETER{i+1}" | |
| if i <= 6: | |
| name = BASE_PARAMS[i] | |
| elif self.fonttype == MATHSY and i <= 21: | |
| name = MATHSY_PARAMS[i - 7] | |
| elif self.fonttype == MATHEX and i <= 12: | |
| name = MATHEX_PARAMS[i - 7] | |
| read_fixed(param(i), name, self.fontdimens) | |
| lig_kern_map = {} | |
| self.right_boundary_char = None | |
| self.left_boundary_char = None | |
| if sizes.nl > 0: | |
| cmd = lig_kern_command(lig_step(0)) | |
| if cmd.skip_byte == 255: | |
| self.right_boundary_char = cmd.next_char | |
| cmd = lig_kern_command(lig_step((sizes.nl - 1))) | |
| if cmd.skip_byte == 255: | |
| self.left_boundary_char = 256 | |
| r = 256 * cmd.op_byte + cmd.remainder | |
| lig_kern_map[self.left_boundary_char] = r | |
| self.chars = {} | |
| for c in range(sizes.bc, sizes.ec + 1): | |
| if width_index(c) > 0: | |
| self.chars[c] = info = {} | |
| info["width"] = width(c) | |
| if height_index(c) > 0: | |
| info["height"] = height(c) | |
| if depth_index(c) > 0: | |
| info["depth"] = depth(c) | |
| if italic_index(c) > 0: | |
| info["italic"] = italic(c) | |
| char_tag = tag(c) | |
| if char_tag == NO_TAG: | |
| pass | |
| elif char_tag == LIG_TAG: | |
| lig_kern_map[c] = remainder(c) | |
| elif char_tag == LIST_TAG: | |
| info["nextlarger"] = remainder(c) | |
| elif char_tag == EXT_TAG: | |
| info["varchar"] = varchar = {} | |
| for i in range(4): | |
| part = data[exten(c) + i] | |
| if i == 3 or part > 0: | |
| name = "rep" | |
| if i == 0: | |
| name = "top" | |
| elif i == 1: | |
| name = "mid" | |
| elif i == 2: | |
| name = "bot" | |
| if noneexistent(part): | |
| varchar[name] = c | |
| else: | |
| varchar[name] = part | |
| self.ligatures = {} | |
| self.kerning = {} | |
| for c, i in sorted(lig_kern_map.items()): | |
| cmd = lig_kern_command(lig_step(i)) | |
| if cmd.skip_byte > STOP_FLAG: | |
| i = 256 * cmd.op_byte + cmd.remainder | |
| while i < sizes.nl: | |
| cmd = lig_kern_command(lig_step(i)) | |
| if cmd.skip_byte > STOP_FLAG: | |
| pass | |
| else: | |
| if cmd.op_byte >= KERN_FLAG: | |
| r = 256 * (cmd.op_byte - KERN_FLAG) + cmd.remainder | |
| self.kerning.setdefault(c, {})[cmd.next_char] = kern(r) | |
| else: | |
| r = cmd.op_byte | |
| if r == 4 or (r > 7 and r != 11): | |
| # Ligature step with nonstandard code, we output | |
| # the code verbatim. | |
| lig = r | |
| else: | |
| lig = "" | |
| if r % 4 > 1: | |
| lig += "/" | |
| lig += "LIG" | |
| if r % 2 != 0: | |
| lig += "/" | |
| while r > 3: | |
| lig += ">" | |
| r -= 4 | |
| self.ligatures.setdefault(c, {})[cmd.next_char] = ( | |
| lig, | |
| cmd.remainder, | |
| ) | |
| if cmd.skip_byte >= STOP_FLAG: | |
| break | |
| i += cmd.skip_byte + 1 | |
| if __name__ == "__main__": | |
| import sys | |
| tfm = TFM(sys.argv[1]) | |
| print( | |
| "\n".join( | |
| x | |
| for x in [ | |
| f"tfm.checksum={tfm.checksum}", | |
| f"tfm.designsize={tfm.designsize}", | |
| f"tfm.codingscheme={tfm.codingscheme}", | |
| f"tfm.fonttype={tfm.fonttype}", | |
| f"tfm.family={tfm.family}", | |
| f"tfm.seven_bit_safe_flag={tfm.seven_bit_safe_flag}", | |
| f"tfm.face={tfm.face}", | |
| f"tfm.extraheader={tfm.extraheader}", | |
| f"tfm.fontdimens={tfm.fontdimens}", | |
| f"tfm.right_boundary_char={tfm.right_boundary_char}", | |
| f"tfm.left_boundary_char={tfm.left_boundary_char}", | |
| f"tfm.kerning={tfm.kerning}", | |
| f"tfm.ligatures={tfm.ligatures}", | |
| f"tfm.chars={tfm.chars}", | |
| ] | |
| ) | |
| ) | |
| print(tfm) | |