Spaces:
Sleeping
Sleeping
import logging | |
import os | |
from collections import defaultdict, namedtuple | |
from functools import reduce | |
from itertools import chain | |
from math import log2 | |
from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple | |
from fontTools.config import OPTIONS | |
from fontTools.misc.intTools import bit_count, bit_indices | |
from fontTools.ttLib import TTFont | |
from fontTools.ttLib.tables import otBase, otTables | |
log = logging.getLogger(__name__) | |
COMPRESSION_LEVEL = OPTIONS[f"{__name__}:COMPRESSION_LEVEL"] | |
# Kept because ufo2ft depends on it, to be removed once ufo2ft uses the config instead | |
# https://github.com/fonttools/fonttools/issues/2592 | |
GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE" | |
GPOS_COMPACT_MODE_DEFAULT = str(COMPRESSION_LEVEL.default) | |
def _compression_level_from_env() -> int: | |
env_level = GPOS_COMPACT_MODE_DEFAULT | |
if GPOS_COMPACT_MODE_ENV_KEY in os.environ: | |
import warnings | |
warnings.warn( | |
f"'{GPOS_COMPACT_MODE_ENV_KEY}' environment variable is deprecated. " | |
"Please set the 'fontTools.otlLib.optimize.gpos:COMPRESSION_LEVEL' option " | |
"in TTFont.cfg.", | |
DeprecationWarning, | |
) | |
env_level = os.environ[GPOS_COMPACT_MODE_ENV_KEY] | |
if len(env_level) == 1 and env_level in "0123456789": | |
return int(env_level) | |
raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={env_level}") | |
def compact(font: TTFont, level: int) -> TTFont: | |
# Ideal plan: | |
# 1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable | |
# https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable | |
# 2. Extract glyph-glyph kerning and class-kerning from all present subtables | |
# 3. Regroup into different subtable arrangements | |
# 4. Put back into the lookup | |
# | |
# Actual implementation: | |
# 2. Only class kerning is optimized currently | |
# 3. If the input kerning is already in several subtables, the subtables | |
# are not grouped together first; instead each subtable is treated | |
# independently, so currently this step is: | |
# Split existing subtables into more smaller subtables | |
gpos = font["GPOS"] | |
for lookup in gpos.table.LookupList.Lookup: | |
if lookup.LookupType == 2: | |
compact_lookup(font, level, lookup) | |
elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2: | |
compact_ext_lookup(font, level, lookup) | |
return font | |
def compact_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None: | |
new_subtables = compact_pair_pos(font, level, lookup.SubTable) | |
lookup.SubTable = new_subtables | |
lookup.SubTableCount = len(new_subtables) | |
def compact_ext_lookup(font: TTFont, level: int, lookup: otTables.Lookup) -> None: | |
new_subtables = compact_pair_pos( | |
font, level, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable] | |
) | |
new_ext_subtables = [] | |
for subtable in new_subtables: | |
ext_subtable = otTables.ExtensionPos() | |
ext_subtable.Format = 1 | |
ext_subtable.ExtSubTable = subtable | |
new_ext_subtables.append(ext_subtable) | |
lookup.SubTable = new_ext_subtables | |
lookup.SubTableCount = len(new_ext_subtables) | |
def compact_pair_pos( | |
font: TTFont, level: int, subtables: Sequence[otTables.PairPos] | |
) -> Sequence[otTables.PairPos]: | |
new_subtables = [] | |
for subtable in subtables: | |
if subtable.Format == 1: | |
# Not doing anything to Format 1 (yet?) | |
new_subtables.append(subtable) | |
elif subtable.Format == 2: | |
new_subtables.extend(compact_class_pairs(font, level, subtable)) | |
return new_subtables | |
def compact_class_pairs( | |
font: TTFont, level: int, subtable: otTables.PairPos | |
) -> List[otTables.PairPos]: | |
from fontTools.otlLib.builder import buildPairPosClassesSubtable | |
subtables = [] | |
classes1: DefaultDict[int, List[str]] = defaultdict(list) | |
for g in subtable.Coverage.glyphs: | |
classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g) | |
classes2: DefaultDict[int, List[str]] = defaultdict(list) | |
for g, i in subtable.ClassDef2.classDefs.items(): | |
classes2[i].append(g) | |
all_pairs = {} | |
for i, class1 in enumerate(subtable.Class1Record): | |
for j, class2 in enumerate(class1.Class2Record): | |
if is_really_zero(class2): | |
continue | |
all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = ( | |
getattr(class2, "Value1", None), | |
getattr(class2, "Value2", None), | |
) | |
grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost(font, all_pairs, level) | |
for pairs in grouped_pairs: | |
subtables.append(buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap())) | |
return subtables | |
def is_really_zero(class2: otTables.Class2Record) -> bool: | |
v1 = getattr(class2, "Value1", None) | |
v2 = getattr(class2, "Value2", None) | |
return (v1 is None or v1.getEffectiveFormat() == 0) and ( | |
v2 is None or v2.getEffectiveFormat() == 0 | |
) | |
Pairs = Dict[ | |
Tuple[Tuple[str, ...], Tuple[str, ...]], | |
Tuple[otBase.ValueRecord, otBase.ValueRecord], | |
] | |
# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958 | |
def _getClassRanges(glyphIDs: Iterable[int]): | |
glyphIDs = sorted(glyphIDs) | |
last = glyphIDs[0] | |
ranges = [[last]] | |
for glyphID in glyphIDs[1:]: | |
if glyphID != last + 1: | |
ranges[-1].append(last) | |
ranges.append([glyphID]) | |
last = glyphID | |
ranges[-1].append(last) | |
return ranges, glyphIDs[0], glyphIDs[-1] | |
# Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989 | |
def _classDef_bytes( | |
class_data: List[Tuple[List[Tuple[int, int]], int, int]], | |
class_ids: List[int], | |
coverage=False, | |
): | |
if not class_ids: | |
return 0 | |
first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]] | |
range_count = len(first_ranges) | |
for i in class_ids[1:]: | |
data = class_data[i] | |
range_count += len(data[0]) | |
min_glyph_id = min(min_glyph_id, data[1]) | |
max_glyph_id = max(max_glyph_id, data[2]) | |
glyphCount = max_glyph_id - min_glyph_id + 1 | |
# https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1 | |
format1_bytes = 6 + glyphCount * 2 | |
# https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2 | |
format2_bytes = 4 + range_count * 6 | |
return min(format1_bytes, format2_bytes) | |
ClusteringContext = namedtuple( | |
"ClusteringContext", | |
[ | |
"lines", | |
"all_class1", | |
"all_class1_data", | |
"all_class2_data", | |
"valueFormat1_bytes", | |
"valueFormat2_bytes", | |
], | |
) | |
class Cluster: | |
# TODO(Python 3.7): Turn this into a dataclass | |
# ctx: ClusteringContext | |
# indices: int | |
# Caches | |
# TODO(Python 3.8): use functools.cached_property instead of the | |
# manually cached properties, and remove the cache fields listed below. | |
# _indices: Optional[List[int]] = None | |
# _column_indices: Optional[List[int]] = None | |
# _cost: Optional[int] = None | |
__slots__ = "ctx", "indices_bitmask", "_indices", "_column_indices", "_cost" | |
def __init__(self, ctx: ClusteringContext, indices_bitmask: int): | |
self.ctx = ctx | |
self.indices_bitmask = indices_bitmask | |
self._indices = None | |
self._column_indices = None | |
self._cost = None | |
def indices(self): | |
if self._indices is None: | |
self._indices = bit_indices(self.indices_bitmask) | |
return self._indices | |
def column_indices(self): | |
if self._column_indices is None: | |
# Indices of columns that have a 1 in at least 1 line | |
# => binary OR all the lines | |
bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices)) | |
self._column_indices = bit_indices(bitmask) | |
return self._column_indices | |
def width(self): | |
# Add 1 because Class2=0 cannot be used but needs to be encoded. | |
return len(self.column_indices) + 1 | |
def cost(self): | |
if self._cost is None: | |
self._cost = ( | |
# 2 bytes to store the offset to this subtable in the Lookup table above | |
2 | |
# Contents of the subtable | |
# From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment | |
# uint16 posFormat Format identifier: format = 2 | |
+ 2 | |
# Offset16 coverageOffset Offset to Coverage table, from beginning of PairPos subtable. | |
+ 2 | |
+ self.coverage_bytes | |
# uint16 valueFormat1 ValueRecord definition — for the first glyph of the pair (may be zero). | |
+ 2 | |
# uint16 valueFormat2 ValueRecord definition — for the second glyph of the pair (may be zero). | |
+ 2 | |
# Offset16 classDef1Offset Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair. | |
+ 2 | |
+ self.classDef1_bytes | |
# Offset16 classDef2Offset Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair. | |
+ 2 | |
+ self.classDef2_bytes | |
# uint16 class1Count Number of classes in classDef1 table — includes Class 0. | |
+ 2 | |
# uint16 class2Count Number of classes in classDef2 table — includes Class 0. | |
+ 2 | |
# Class1Record class1Records[class1Count] Array of Class1 records, ordered by classes in classDef1. | |
+ (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes) | |
* len(self.indices) | |
* self.width | |
) | |
return self._cost | |
def coverage_bytes(self): | |
format1_bytes = ( | |
# From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1 | |
# uint16 coverageFormat Format identifier — format = 1 | |
# uint16 glyphCount Number of glyphs in the glyph array | |
4 | |
# uint16 glyphArray[glyphCount] Array of glyph IDs — in numerical order | |
+ sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2 | |
) | |
ranges = sorted( | |
chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices) | |
) | |
merged_range_count = 0 | |
last = None | |
for start, end in ranges: | |
if last is not None and start != last + 1: | |
merged_range_count += 1 | |
last = end | |
format2_bytes = ( | |
# From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2 | |
# uint16 coverageFormat Format identifier — format = 2 | |
# uint16 rangeCount Number of RangeRecords | |
4 | |
# RangeRecord rangeRecords[rangeCount] Array of glyph ranges — ordered by startGlyphID. | |
# uint16 startGlyphID First glyph ID in the range | |
# uint16 endGlyphID Last glyph ID in the range | |
# uint16 startCoverageIndex Coverage Index of first glyph ID in range | |
+ merged_range_count * 6 | |
) | |
return min(format1_bytes, format2_bytes) | |
def classDef1_bytes(self): | |
# We can skip encoding one of the Class1 definitions, and use | |
# Class1=0 to represent it instead, because Class1 is gated by the | |
# Coverage definition. Use Class1=0 for the highest byte savings. | |
# Going through all options takes too long, pick the biggest class | |
# = what happens in otlLib.builder.ClassDefBuilder.classes() | |
biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i])) | |
return _classDef_bytes( | |
self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index] | |
) | |
def classDef2_bytes(self): | |
# All Class2 need to be encoded because we can't use Class2=0 | |
return _classDef_bytes(self.ctx.all_class2_data, self.column_indices) | |
def cluster_pairs_by_class2_coverage_custom_cost( | |
font: TTFont, | |
pairs: Pairs, | |
compression: int = 5, | |
) -> List[Pairs]: | |
if not pairs: | |
# The subtable was actually empty? | |
return [pairs] | |
# Sorted for reproducibility/determinism | |
all_class1 = sorted(set(pair[0] for pair in pairs)) | |
all_class2 = sorted(set(pair[1] for pair in pairs)) | |
# Use Python's big ints for binary vectors representing each line | |
lines = [ | |
sum( | |
1 << i if (class1, class2) in pairs else 0 | |
for i, class2 in enumerate(all_class2) | |
) | |
for class1 in all_class1 | |
] | |
# Map glyph names to ids and work with ints throughout for ClassDef formats | |
name_to_id = font.getReverseGlyphMap() | |
# Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id) | |
all_class1_data = [ | |
_getClassRanges(name_to_id[name] for name in cls) for cls in all_class1 | |
] | |
all_class2_data = [ | |
_getClassRanges(name_to_id[name] for name in cls) for cls in all_class2 | |
] | |
format1 = 0 | |
format2 = 0 | |
for pair, value in pairs.items(): | |
format1 |= value[0].getEffectiveFormat() if value[0] else 0 | |
format2 |= value[1].getEffectiveFormat() if value[1] else 0 | |
valueFormat1_bytes = bit_count(format1) * 2 | |
valueFormat2_bytes = bit_count(format2) * 2 | |
ctx = ClusteringContext( | |
lines, | |
all_class1, | |
all_class1_data, | |
all_class2_data, | |
valueFormat1_bytes, | |
valueFormat2_bytes, | |
) | |
cluster_cache: Dict[int, Cluster] = {} | |
def make_cluster(indices: int) -> Cluster: | |
cluster = cluster_cache.get(indices, None) | |
if cluster is not None: | |
return cluster | |
cluster = Cluster(ctx, indices) | |
cluster_cache[indices] = cluster | |
return cluster | |
def merge(cluster: Cluster, other: Cluster) -> Cluster: | |
return make_cluster(cluster.indices_bitmask | other.indices_bitmask) | |
# Agglomerative clustering by hand, checking the cost gain of the new | |
# cluster against the previously separate clusters | |
# Start with 1 cluster per line | |
# cluster = set of lines = new subtable | |
clusters = [make_cluster(1 << i) for i in range(len(lines))] | |
# Cost of 1 cluster with everything | |
# `(1 << len) - 1` gives a bitmask full of 1's of length `len` | |
cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost | |
log.debug(f" len(clusters) = {len(clusters)}") | |
while len(clusters) > 1: | |
lowest_cost_change = None | |
best_cluster_index = None | |
best_other_index = None | |
best_merged = None | |
for i, cluster in enumerate(clusters): | |
for j, other in enumerate(clusters[i + 1 :]): | |
merged = merge(cluster, other) | |
cost_change = merged.cost - cluster.cost - other.cost | |
if lowest_cost_change is None or cost_change < lowest_cost_change: | |
lowest_cost_change = cost_change | |
best_cluster_index = i | |
best_other_index = i + 1 + j | |
best_merged = merged | |
assert lowest_cost_change is not None | |
assert best_cluster_index is not None | |
assert best_other_index is not None | |
assert best_merged is not None | |
# If the best merge we found is still taking down the file size, then | |
# there's no question: we must do it, because it's beneficial in both | |
# ways (lower file size and lower number of subtables). However, if the | |
# best merge we found is not reducing file size anymore, then we need to | |
# look at the other stop criteria = the compression factor. | |
if lowest_cost_change > 0: | |
# Stop critera: check whether we should keep merging. | |
# Compute size reduction brought by splitting | |
cost_after_splitting = sum(c.cost for c in clusters) | |
# size_reduction so that after = before * (1 - size_reduction) | |
# E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2 | |
size_reduction = 1 - cost_after_splitting / cost_before_splitting | |
# Force more merging by taking into account the compression number. | |
# Target behaviour: compression number = 1 to 9, default 5 like gzip | |
# - 1 = accept to add 1 subtable to reduce size by 50% | |
# - 5 = accept to add 5 subtables to reduce size by 50% | |
# See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691 | |
# Given the size reduction we have achieved so far, compute how many | |
# new subtables are acceptable. | |
max_new_subtables = -log2(1 - size_reduction) * compression | |
log.debug( | |
f" len(clusters) = {len(clusters):3d} size_reduction={size_reduction:5.2f} max_new_subtables={max_new_subtables}", | |
) | |
if compression == 9: | |
# Override level 9 to mean: create any number of subtables | |
max_new_subtables = len(clusters) | |
# If we have managed to take the number of new subtables below the | |
# threshold, then we can stop. | |
if len(clusters) <= max_new_subtables + 1: | |
break | |
# No reason to stop yet, do the merge and move on to the next. | |
del clusters[best_other_index] | |
clusters[best_cluster_index] = best_merged | |
# All clusters are final; turn bitmasks back into the "Pairs" format | |
pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict) | |
for pair, values in pairs.items(): | |
pairs_by_class1[pair[0]][pair] = values | |
pairs_groups: List[Pairs] = [] | |
for cluster in clusters: | |
pairs_group: Pairs = dict() | |
for i in cluster.indices: | |
class1 = all_class1[i] | |
pairs_group.update(pairs_by_class1[class1]) | |
pairs_groups.append(pairs_group) | |
return pairs_groups | |