Spaces:
Sleeping
Sleeping
from fontTools.misc.roundTools import noRound, otRound | |
from fontTools.misc.intTools import bit_count | |
from fontTools.ttLib.tables import otTables as ot | |
from fontTools.varLib.models import supportScalar | |
from fontTools.varLib.builder import ( | |
buildVarRegionList, | |
buildVarStore, | |
buildVarRegion, | |
buildVarData, | |
) | |
from functools import partial | |
from collections import defaultdict | |
from heapq import heappush, heappop | |
NO_VARIATION_INDEX = ot.NO_VARIATION_INDEX | |
ot.VarStore.NO_VARIATION_INDEX = NO_VARIATION_INDEX | |
def _getLocationKey(loc): | |
return tuple(sorted(loc.items(), key=lambda kv: kv[0])) | |
class OnlineVarStoreBuilder(object): | |
def __init__(self, axisTags): | |
self._axisTags = axisTags | |
self._regionMap = {} | |
self._regionList = buildVarRegionList([], axisTags) | |
self._store = buildVarStore(self._regionList, []) | |
self._data = None | |
self._model = None | |
self._supports = None | |
self._varDataIndices = {} | |
self._varDataCaches = {} | |
self._cache = None | |
def setModel(self, model): | |
self.setSupports(model.supports) | |
self._model = model | |
def setSupports(self, supports): | |
self._model = None | |
self._supports = list(supports) | |
if not self._supports[0]: | |
del self._supports[0] # Drop base master support | |
self._cache = None | |
self._data = None | |
def finish(self, optimize=True): | |
self._regionList.RegionCount = len(self._regionList.Region) | |
self._store.VarDataCount = len(self._store.VarData) | |
for data in self._store.VarData: | |
data.ItemCount = len(data.Item) | |
data.calculateNumShorts(optimize=optimize) | |
return self._store | |
def _add_VarData(self, num_items=1): | |
regionMap = self._regionMap | |
regionList = self._regionList | |
regions = self._supports | |
regionIndices = [] | |
for region in regions: | |
key = _getLocationKey(region) | |
idx = regionMap.get(key) | |
if idx is None: | |
varRegion = buildVarRegion(region, self._axisTags) | |
idx = regionMap[key] = len(regionList.Region) | |
regionList.Region.append(varRegion) | |
regionIndices.append(idx) | |
# Check if we have one already... | |
key = tuple(regionIndices) | |
varDataIdx = self._varDataIndices.get(key) | |
if varDataIdx is not None: | |
self._outer = varDataIdx | |
self._data = self._store.VarData[varDataIdx] | |
self._cache = self._varDataCaches[key] | |
if len(self._data.Item) + num_items > 0xFFFF: | |
# This is full. Need new one. | |
varDataIdx = None | |
if varDataIdx is None: | |
self._data = buildVarData(regionIndices, [], optimize=False) | |
self._outer = len(self._store.VarData) | |
self._store.VarData.append(self._data) | |
self._varDataIndices[key] = self._outer | |
if key not in self._varDataCaches: | |
self._varDataCaches[key] = {} | |
self._cache = self._varDataCaches[key] | |
def storeMasters(self, master_values, *, round=round): | |
deltas = self._model.getDeltas(master_values, round=round) | |
base = deltas.pop(0) | |
return base, self.storeDeltas(deltas, round=noRound) | |
def storeMastersMany(self, master_values_list, *, round=round): | |
deltas_list = [ | |
self._model.getDeltas(master_values, round=round) | |
for master_values in master_values_list | |
] | |
base_list = [deltas.pop(0) for deltas in deltas_list] | |
return base_list, self.storeDeltasMany(deltas_list, round=noRound) | |
def storeDeltas(self, deltas, *, round=round): | |
deltas = [round(d) for d in deltas] | |
if len(deltas) == len(self._supports) + 1: | |
deltas = tuple(deltas[1:]) | |
else: | |
assert len(deltas) == len(self._supports) | |
deltas = tuple(deltas) | |
if not self._data: | |
self._add_VarData() | |
varIdx = self._cache.get(deltas) | |
if varIdx is not None: | |
return varIdx | |
inner = len(self._data.Item) | |
if inner == 0xFFFF: | |
# Full array. Start new one. | |
self._add_VarData() | |
return self.storeDeltas(deltas, round=noRound) | |
self._data.addItem(deltas, round=noRound) | |
varIdx = (self._outer << 16) + inner | |
self._cache[deltas] = varIdx | |
return varIdx | |
def storeDeltasMany(self, deltas_list, *, round=round): | |
deltas_list = [[round(d) for d in deltas] for deltas in deltas_list] | |
deltas_list = tuple(tuple(deltas) for deltas in deltas_list) | |
if not self._data: | |
self._add_VarData(len(deltas_list)) | |
varIdx = self._cache.get(deltas_list) | |
if varIdx is not None: | |
return varIdx | |
inner = len(self._data.Item) | |
if inner + len(deltas_list) > 0xFFFF: | |
# Full array. Start new one. | |
self._add_VarData(len(deltas_list)) | |
return self.storeDeltasMany(deltas_list, round=noRound) | |
for i, deltas in enumerate(deltas_list): | |
self._data.addItem(deltas, round=noRound) | |
varIdx = (self._outer << 16) + inner + i | |
self._cache[deltas] = varIdx | |
varIdx = (self._outer << 16) + inner | |
self._cache[deltas_list] = varIdx | |
return varIdx | |
def VarData_addItem(self, deltas, *, round=round): | |
deltas = [round(d) for d in deltas] | |
countUs = self.VarRegionCount | |
countThem = len(deltas) | |
if countUs + 1 == countThem: | |
deltas = list(deltas[1:]) | |
else: | |
assert countUs == countThem, (countUs, countThem) | |
deltas = list(deltas) | |
self.Item.append(deltas) | |
self.ItemCount = len(self.Item) | |
ot.VarData.addItem = VarData_addItem | |
def VarRegion_get_support(self, fvar_axes): | |
return { | |
fvar_axes[i].axisTag: (reg.StartCoord, reg.PeakCoord, reg.EndCoord) | |
for i, reg in enumerate(self.VarRegionAxis) | |
if reg.PeakCoord != 0 | |
} | |
ot.VarRegion.get_support = VarRegion_get_support | |
def VarStore___bool__(self): | |
return bool(self.VarData) | |
ot.VarStore.__bool__ = VarStore___bool__ | |
class VarStoreInstancer(object): | |
def __init__(self, varstore, fvar_axes, location={}): | |
self.fvar_axes = fvar_axes | |
assert varstore is None or varstore.Format == 1 | |
self._varData = varstore.VarData if varstore else [] | |
self._regions = varstore.VarRegionList.Region if varstore else [] | |
self.setLocation(location) | |
def setLocation(self, location): | |
self.location = dict(location) | |
self._clearCaches() | |
def _clearCaches(self): | |
self._scalars = {} | |
def _getScalar(self, regionIdx): | |
scalar = self._scalars.get(regionIdx) | |
if scalar is None: | |
support = self._regions[regionIdx].get_support(self.fvar_axes) | |
scalar = supportScalar(self.location, support) | |
self._scalars[regionIdx] = scalar | |
return scalar | |
def interpolateFromDeltasAndScalars(deltas, scalars): | |
delta = 0.0 | |
for d, s in zip(deltas, scalars): | |
if not s: | |
continue | |
delta += d * s | |
return delta | |
def __getitem__(self, varidx): | |
major, minor = varidx >> 16, varidx & 0xFFFF | |
if varidx == NO_VARIATION_INDEX: | |
return 0.0 | |
varData = self._varData | |
scalars = [self._getScalar(ri) for ri in varData[major].VarRegionIndex] | |
deltas = varData[major].Item[minor] | |
return self.interpolateFromDeltasAndScalars(deltas, scalars) | |
def interpolateFromDeltas(self, varDataIndex, deltas): | |
varData = self._varData | |
scalars = [self._getScalar(ri) for ri in varData[varDataIndex].VarRegionIndex] | |
return self.interpolateFromDeltasAndScalars(deltas, scalars) | |
# | |
# Optimizations | |
# | |
# retainFirstMap - If true, major 0 mappings are retained. Deltas for unused indices are zeroed | |
# advIdxes - Set of major 0 indices for advance deltas to be listed first. Other major 0 indices follow. | |
def VarStore_subset_varidxes( | |
self, | |
varIdxes, | |
optimize=True, | |
retainFirstMap=False, | |
advIdxes=set(), | |
*, | |
VarData="VarData", | |
): | |
# Sort out used varIdxes by major/minor. | |
used = defaultdict(set) | |
for varIdx in varIdxes: | |
if varIdx == NO_VARIATION_INDEX: | |
continue | |
major = varIdx >> 16 | |
minor = varIdx & 0xFFFF | |
used[major].add(minor) | |
del varIdxes | |
# | |
# Subset VarData | |
# | |
varData = getattr(self, VarData) | |
newVarData = [] | |
varDataMap = {NO_VARIATION_INDEX: NO_VARIATION_INDEX} | |
for major, data in enumerate(varData): | |
usedMinors = used.get(major) | |
if usedMinors is None: | |
continue | |
newMajor = len(newVarData) | |
newVarData.append(data) | |
items = data.Item | |
newItems = [] | |
if major == 0 and retainFirstMap: | |
for minor in range(len(items)): | |
newItems.append( | |
items[minor] if minor in usedMinors else [0] * len(items[minor]) | |
) | |
varDataMap[minor] = minor | |
else: | |
if major == 0: | |
minors = sorted(advIdxes) + sorted(usedMinors - advIdxes) | |
else: | |
minors = sorted(usedMinors) | |
for minor in minors: | |
newMinor = len(newItems) | |
newItems.append(items[minor]) | |
varDataMap[(major << 16) + minor] = (newMajor << 16) + newMinor | |
data.Item = newItems | |
data.ItemCount = len(data.Item) | |
if VarData == "VarData": | |
data.calculateNumShorts(optimize=optimize) | |
setattr(self, VarData, newVarData) | |
setattr(self, VarData + "Count", len(newVarData)) | |
self.prune_regions() | |
return varDataMap | |
ot.VarStore.subset_varidxes = VarStore_subset_varidxes | |
def VarStore_prune_regions(self, *, VarData="VarData", VarRegionList="VarRegionList"): | |
"""Remove unused VarRegions.""" | |
# | |
# Subset VarRegionList | |
# | |
# Collect. | |
usedRegions = set() | |
for data in getattr(self, VarData): | |
usedRegions.update(data.VarRegionIndex) | |
# Subset. | |
regionList = getattr(self, VarRegionList) | |
regions = regionList.Region | |
newRegions = [] | |
regionMap = {} | |
for i in sorted(usedRegions): | |
regionMap[i] = len(newRegions) | |
newRegions.append(regions[i]) | |
regionList.Region = newRegions | |
regionList.RegionCount = len(regionList.Region) | |
# Map. | |
for data in getattr(self, VarData): | |
data.VarRegionIndex = [regionMap[i] for i in data.VarRegionIndex] | |
ot.VarStore.prune_regions = VarStore_prune_regions | |
def _visit(self, func): | |
"""Recurse down from self, if type of an object is ot.Device, | |
call func() on it. Works on otData-style classes.""" | |
if type(self) == ot.Device: | |
func(self) | |
elif isinstance(self, list): | |
for that in self: | |
_visit(that, func) | |
elif hasattr(self, "getConverters") and not hasattr(self, "postRead"): | |
for conv in self.getConverters(): | |
that = getattr(self, conv.name, None) | |
if that is not None: | |
_visit(that, func) | |
elif isinstance(self, ot.ValueRecord): | |
for that in self.__dict__.values(): | |
_visit(that, func) | |
def _Device_recordVarIdx(self, s): | |
"""Add VarIdx in this Device table (if any) to the set s.""" | |
if self.DeltaFormat == 0x8000: | |
s.add((self.StartSize << 16) + self.EndSize) | |
def Object_collect_device_varidxes(self, varidxes): | |
adder = partial(_Device_recordVarIdx, s=varidxes) | |
_visit(self, adder) | |
ot.GDEF.collect_device_varidxes = Object_collect_device_varidxes | |
ot.GPOS.collect_device_varidxes = Object_collect_device_varidxes | |
def _Device_mapVarIdx(self, mapping, done): | |
"""Map VarIdx in this Device table (if any) through mapping.""" | |
if id(self) in done: | |
return | |
done.add(id(self)) | |
if self.DeltaFormat == 0x8000: | |
varIdx = mapping[(self.StartSize << 16) + self.EndSize] | |
self.StartSize = varIdx >> 16 | |
self.EndSize = varIdx & 0xFFFF | |
def Object_remap_device_varidxes(self, varidxes_map): | |
mapper = partial(_Device_mapVarIdx, mapping=varidxes_map, done=set()) | |
_visit(self, mapper) | |
ot.GDEF.remap_device_varidxes = Object_remap_device_varidxes | |
ot.GPOS.remap_device_varidxes = Object_remap_device_varidxes | |
class _Encoding(object): | |
def __init__(self, chars): | |
self.chars = chars | |
self.width = bit_count(chars) | |
self.columns = self._columns(chars) | |
self.overhead = self._characteristic_overhead(self.columns) | |
self.items = set() | |
def append(self, row): | |
self.items.add(row) | |
def extend(self, lst): | |
self.items.update(lst) | |
def get_room(self): | |
"""Maximum number of bytes that can be added to characteristic | |
while still being beneficial to merge it into another one.""" | |
count = len(self.items) | |
return max(0, (self.overhead - 1) // count - self.width) | |
room = property(get_room) | |
def get_gain(self): | |
"""Maximum possible byte gain from merging this into another | |
characteristic.""" | |
count = len(self.items) | |
return max(0, self.overhead - count) | |
gain = property(get_gain) | |
def gain_sort_key(self): | |
return self.gain, self.chars | |
def width_sort_key(self): | |
return self.width, self.chars | |
def _characteristic_overhead(columns): | |
"""Returns overhead in bytes of encoding this characteristic | |
as a VarData.""" | |
c = 4 + 6 # 4 bytes for LOffset, 6 bytes for VarData header | |
c += bit_count(columns) * 2 | |
return c | |
def _columns(chars): | |
cols = 0 | |
i = 1 | |
while chars: | |
if chars & 0b1111: | |
cols |= i | |
chars >>= 4 | |
i <<= 1 | |
return cols | |
def gain_from_merging(self, other_encoding): | |
combined_chars = other_encoding.chars | self.chars | |
combined_width = bit_count(combined_chars) | |
combined_columns = self.columns | other_encoding.columns | |
combined_overhead = _Encoding._characteristic_overhead(combined_columns) | |
combined_gain = ( | |
+self.overhead | |
+ other_encoding.overhead | |
- combined_overhead | |
- (combined_width - self.width) * len(self.items) | |
- (combined_width - other_encoding.width) * len(other_encoding.items) | |
) | |
return combined_gain | |
class _EncodingDict(dict): | |
def __missing__(self, chars): | |
r = self[chars] = _Encoding(chars) | |
return r | |
def add_row(self, row): | |
chars = self._row_characteristics(row) | |
self[chars].append(row) | |
def _row_characteristics(row): | |
"""Returns encoding characteristics for a row.""" | |
longWords = False | |
chars = 0 | |
i = 1 | |
for v in row: | |
if v: | |
chars += i | |
if not (-128 <= v <= 127): | |
chars += i * 0b0010 | |
if not (-32768 <= v <= 32767): | |
longWords = True | |
break | |
i <<= 4 | |
if longWords: | |
# Redo; only allow 2byte/4byte encoding | |
chars = 0 | |
i = 1 | |
for v in row: | |
if v: | |
chars += i * 0b0011 | |
if not (-32768 <= v <= 32767): | |
chars += i * 0b1100 | |
i <<= 4 | |
return chars | |
def VarStore_optimize(self, use_NO_VARIATION_INDEX=True, quantization=1): | |
"""Optimize storage. Returns mapping from old VarIdxes to new ones.""" | |
# Overview: | |
# | |
# For each VarData row, we first extend it with zeroes to have | |
# one column per region in VarRegionList. We then group the | |
# rows into _Encoding objects, by their "characteristic" bitmap. | |
# The characteristic bitmap is a binary number representing how | |
# many bytes each column of the data takes up to encode. Each | |
# column is encoded in four bits. For example, if a column has | |
# only values in the range -128..127, it would only have a single | |
# bit set in the characteristic bitmap for that column. If it has | |
# values in the range -32768..32767, it would have two bits set. | |
# The number of ones in the characteristic bitmap is the "width" | |
# of the encoding. | |
# | |
# Each encoding as such has a number of "active" (ie. non-zero) | |
# columns. The overhead of encoding the characteristic bitmap | |
# is 10 bytes, plus 2 bytes per active column. | |
# | |
# When an encoding is merged into another one, if the characteristic | |
# of the old encoding is a subset of the new one, then the overhead | |
# of the old encoding is completely eliminated. However, each row | |
# now would require more bytes to encode, to the tune of one byte | |
# per characteristic bit that is active in the new encoding but not | |
# in the old one. The number of bits that can be added to an encoding | |
# while still beneficial to merge it into another encoding is called | |
# the "room" for that encoding. | |
# | |
# The "gain" of an encodings is the maximum number of bytes we can | |
# save by merging it into another encoding. The "gain" of merging | |
# two encodings is how many bytes we save by doing so. | |
# | |
# High-level algorithm: | |
# | |
# - Each encoding has a minimal way to encode it. However, because | |
# of the overhead of encoding the characteristic bitmap, it may | |
# be beneficial to merge two encodings together, if there is | |
# gain in doing so. As such, we need to search for the best | |
# such successive merges. | |
# | |
# Algorithm: | |
# | |
# - Put all encodings into a "todo" list. | |
# | |
# - Sort todo list by decreasing gain (for stability). | |
# | |
# - Make a priority-queue of the gain from combining each two | |
# encodings in the todo list. The priority queue is sorted by | |
# decreasing gain. Only positive gains are included. | |
# | |
# - While priority queue is not empty: | |
# - Pop the first item from the priority queue, | |
# - Merge the two encodings it represents, | |
# - Remove the two encodings from the todo list, | |
# - Insert positive gains from combining the new encoding with | |
# all existing todo list items into the priority queue, | |
# - If a todo list item with the same characteristic bitmap as | |
# the new encoding exists, remove it from the todo list and | |
# merge it into the new encoding. | |
# - Insert the new encoding into the todo list, | |
# | |
# - Encode all remaining items in the todo list. | |
# | |
# The output is then sorted for stability, in the following way: | |
# - The VarRegionList of the input is kept intact. | |
# - All encodings are sorted before the main algorithm, by | |
# gain_key_sort(), which is a tuple of the following items: | |
# * The gain of the encoding. | |
# * The characteristic bitmap of the encoding, with higher-numbered | |
# columns compared first. | |
# - The VarData is sorted by width_sort_key(), which is a tuple | |
# of the following items: | |
# * The "width" of the encoding. | |
# * The characteristic bitmap of the encoding, with higher-numbered | |
# columns compared first. | |
# - Within each VarData, the items are sorted as vectors of numbers. | |
# | |
# Finally, each VarData is optimized to remove the empty columns and | |
# reorder columns as needed. | |
# TODO | |
# Check that no two VarRegions are the same; if they are, fold them. | |
n = len(self.VarRegionList.Region) # Number of columns | |
zeroes = [0] * n | |
front_mapping = {} # Map from old VarIdxes to full row tuples | |
encodings = _EncodingDict() | |
# Collect all items into a set of full rows (with lots of zeroes.) | |
for major, data in enumerate(self.VarData): | |
regionIndices = data.VarRegionIndex | |
for minor, item in enumerate(data.Item): | |
row = list(zeroes) | |
if quantization == 1: | |
for regionIdx, v in zip(regionIndices, item): | |
row[regionIdx] += v | |
else: | |
for regionIdx, v in zip(regionIndices, item): | |
row[regionIdx] += ( | |
round(v / quantization) * quantization | |
) # TODO https://github.com/fonttools/fonttools/pull/3126#discussion_r1205439785 | |
row = tuple(row) | |
if use_NO_VARIATION_INDEX and not any(row): | |
front_mapping[(major << 16) + minor] = None | |
continue | |
encodings.add_row(row) | |
front_mapping[(major << 16) + minor] = row | |
# Prepare for the main algorithm. | |
todo = sorted(encodings.values(), key=_Encoding.gain_sort_key) | |
del encodings | |
# Repeatedly pick two best encodings to combine, and combine them. | |
heap = [] | |
for i, encoding in enumerate(todo): | |
for j in range(i + 1, len(todo)): | |
other_encoding = todo[j] | |
combining_gain = encoding.gain_from_merging(other_encoding) | |
if combining_gain > 0: | |
heappush(heap, (-combining_gain, i, j)) | |
while heap: | |
_, i, j = heappop(heap) | |
if todo[i] is None or todo[j] is None: | |
continue | |
encoding, other_encoding = todo[i], todo[j] | |
todo[i], todo[j] = None, None | |
# Combine the two encodings | |
combined_chars = other_encoding.chars | encoding.chars | |
combined_encoding = _Encoding(combined_chars) | |
combined_encoding.extend(encoding.items) | |
combined_encoding.extend(other_encoding.items) | |
for k, enc in enumerate(todo): | |
if enc is None: | |
continue | |
# In the unlikely event that the same encoding exists already, | |
# combine it. | |
if enc.chars == combined_chars: | |
combined_encoding.extend(enc.items) | |
todo[k] = None | |
continue | |
combining_gain = combined_encoding.gain_from_merging(enc) | |
if combining_gain > 0: | |
heappush(heap, (-combining_gain, k, len(todo))) | |
todo.append(combined_encoding) | |
encodings = [encoding for encoding in todo if encoding is not None] | |
# Assemble final store. | |
back_mapping = {} # Mapping from full rows to new VarIdxes | |
encodings.sort(key=_Encoding.width_sort_key) | |
self.VarData = [] | |
for encoding in encodings: | |
items = sorted(encoding.items) | |
while items: | |
major = len(self.VarData) | |
data = ot.VarData() | |
self.VarData.append(data) | |
data.VarRegionIndex = range(n) | |
data.VarRegionCount = len(data.VarRegionIndex) | |
# Each major can only encode up to 0xFFFF entries. | |
data.Item, items = items[:0xFFFF], items[0xFFFF:] | |
for minor, item in enumerate(data.Item): | |
back_mapping[item] = (major << 16) + minor | |
# Compile final mapping. | |
varidx_map = {NO_VARIATION_INDEX: NO_VARIATION_INDEX} | |
for k, v in front_mapping.items(): | |
varidx_map[k] = back_mapping[v] if v is not None else NO_VARIATION_INDEX | |
# Recalculate things and go home. | |
self.VarRegionList.RegionCount = len(self.VarRegionList.Region) | |
self.VarDataCount = len(self.VarData) | |
for data in self.VarData: | |
data.ItemCount = len(data.Item) | |
data.optimize() | |
# Remove unused regions. | |
self.prune_regions() | |
return varidx_map | |
ot.VarStore.optimize = VarStore_optimize | |
def main(args=None): | |
"""Optimize a font's GDEF variation store""" | |
from argparse import ArgumentParser | |
from fontTools import configLogger | |
from fontTools.ttLib import TTFont | |
from fontTools.ttLib.tables.otBase import OTTableWriter | |
parser = ArgumentParser(prog="varLib.varStore", description=main.__doc__) | |
parser.add_argument("--quantization", type=int, default=1) | |
parser.add_argument("fontfile") | |
parser.add_argument("outfile", nargs="?") | |
options = parser.parse_args(args) | |
# TODO: allow user to configure logging via command-line options | |
configLogger(level="INFO") | |
quantization = options.quantization | |
fontfile = options.fontfile | |
outfile = options.outfile | |
font = TTFont(fontfile) | |
gdef = font["GDEF"] | |
store = gdef.table.VarStore | |
writer = OTTableWriter() | |
store.compile(writer, font) | |
size = len(writer.getAllData()) | |
print("Before: %7d bytes" % size) | |
varidx_map = store.optimize(quantization=quantization) | |
writer = OTTableWriter() | |
store.compile(writer, font) | |
size = len(writer.getAllData()) | |
print("After: %7d bytes" % size) | |
if outfile is not None: | |
gdef.table.remap_device_varidxes(varidx_map) | |
if "GPOS" in font: | |
font["GPOS"].table.remap_device_varidxes(varidx_map) | |
font.save(outfile) | |
if __name__ == "__main__": | |
import sys | |
if len(sys.argv) > 1: | |
sys.exit(main()) | |
import doctest | |
sys.exit(doctest.testmod().failed) | |