| import logging |
| from collections import defaultdict, namedtuple |
| from functools import reduce |
| from itertools import chain |
| from math import log2 |
| from typing import DefaultDict, Dict, Iterable, List, Sequence, Tuple |
| |
| from fontTools.misc.intTools import bit_count, bit_indices |
| from fontTools.ttLib import TTFont |
| from fontTools.ttLib.tables import otBase, otTables |
| |
| # NOTE: activating this optimization via the environment variable is |
| # experimental and may not be supported once an alternative mechanism |
| # is in place. See: https://github.com/fonttools/fonttools/issues/2349 |
| GPOS_COMPACT_MODE_ENV_KEY = "FONTTOOLS_GPOS_COMPACT_MODE" |
| GPOS_COMPACT_MODE_DEFAULT = "0" |
| |
| log = logging.getLogger("fontTools.otlLib.optimize.gpos") |
| |
| |
| def compact(font: TTFont, mode: str) -> TTFont: |
| # Ideal plan: |
| # 1. Find lookups of Lookup Type 2: Pair Adjustment Positioning Subtable |
| # https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#lookup-type-2-pair-adjustment-positioning-subtable |
| # 2. Extract glyph-glyph kerning and class-kerning from all present subtables |
| # 3. Regroup into different subtable arrangements |
| # 4. Put back into the lookup |
| # |
| # Actual implementation: |
| # 2. Only class kerning is optimized currently |
| # 3. If the input kerning is already in several subtables, the subtables |
| # are not grouped together first; instead each subtable is treated |
| # independently, so currently this step is: |
| # Split existing subtables into more smaller subtables |
| gpos = font["GPOS"] |
| for lookup in gpos.table.LookupList.Lookup: |
| if lookup.LookupType == 2: |
| compact_lookup(font, mode, lookup) |
| elif lookup.LookupType == 9 and lookup.SubTable[0].ExtensionLookupType == 2: |
| compact_ext_lookup(font, mode, lookup) |
| return font |
| |
| |
| def compact_lookup(font: TTFont, mode: str, lookup: otTables.Lookup) -> None: |
| new_subtables = compact_pair_pos(font, mode, lookup.SubTable) |
| lookup.SubTable = new_subtables |
| lookup.SubTableCount = len(new_subtables) |
| |
| |
| def compact_ext_lookup(font: TTFont, mode: str, lookup: otTables.Lookup) -> None: |
| new_subtables = compact_pair_pos( |
| font, mode, [ext_subtable.ExtSubTable for ext_subtable in lookup.SubTable] |
| ) |
| new_ext_subtables = [] |
| for subtable in new_subtables: |
| ext_subtable = otTables.ExtensionPos() |
| ext_subtable.Format = 1 |
| ext_subtable.ExtSubTable = subtable |
| new_ext_subtables.append(ext_subtable) |
| lookup.SubTable = new_ext_subtables |
| lookup.SubTableCount = len(new_ext_subtables) |
| |
| |
| def compact_pair_pos( |
| font: TTFont, mode: str, subtables: Sequence[otTables.PairPos] |
| ) -> Sequence[otTables.PairPos]: |
| new_subtables = [] |
| for subtable in subtables: |
| if subtable.Format == 1: |
| # Not doing anything to Format 1 (yet?) |
| new_subtables.append(subtable) |
| elif subtable.Format == 2: |
| new_subtables.extend(compact_class_pairs(font, mode, subtable)) |
| return new_subtables |
| |
| |
| def compact_class_pairs( |
| font: TTFont, mode: str, subtable: otTables.PairPos |
| ) -> List[otTables.PairPos]: |
| from fontTools.otlLib.builder import buildPairPosClassesSubtable |
| |
| subtables = [] |
| classes1: DefaultDict[int, List[str]] = defaultdict(list) |
| for g in subtable.Coverage.glyphs: |
| classes1[subtable.ClassDef1.classDefs.get(g, 0)].append(g) |
| classes2: DefaultDict[int, List[str]] = defaultdict(list) |
| for g, i in subtable.ClassDef2.classDefs.items(): |
| classes2[i].append(g) |
| all_pairs = {} |
| for i, class1 in enumerate(subtable.Class1Record): |
| for j, class2 in enumerate(class1.Class2Record): |
| if is_really_zero(class2): |
| continue |
| all_pairs[(tuple(sorted(classes1[i])), tuple(sorted(classes2[j])))] = ( |
| getattr(class2, "Value1", None), |
| getattr(class2, "Value2", None), |
| ) |
| |
| if len(mode) == 1 and mode in "123456789": |
| grouped_pairs = cluster_pairs_by_class2_coverage_custom_cost( |
| font, all_pairs, int(mode) |
| ) |
| for pairs in grouped_pairs: |
| subtables.append( |
| buildPairPosClassesSubtable(pairs, font.getReverseGlyphMap()) |
| ) |
| else: |
| raise ValueError(f"Bad {GPOS_COMPACT_MODE_ENV_KEY}={mode}") |
| return subtables |
| |
| |
| def is_really_zero(class2: otTables.Class2Record) -> bool: |
| v1 = getattr(class2, "Value1", None) |
| v2 = getattr(class2, "Value2", None) |
| return (v1 is None or v1.getEffectiveFormat() == 0) and ( |
| v2 is None or v2.getEffectiveFormat() == 0 |
| ) |
| |
| |
| Pairs = Dict[ |
| Tuple[Tuple[str, ...], Tuple[str, ...]], |
| Tuple[otBase.ValueRecord, otBase.ValueRecord], |
| ] |
| |
| # Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L935-L958 |
| def _getClassRanges(glyphIDs: Iterable[int]): |
| glyphIDs = sorted(glyphIDs) |
| last = glyphIDs[0] |
| ranges = [[last]] |
| for glyphID in glyphIDs[1:]: |
| if glyphID != last + 1: |
| ranges[-1].append(last) |
| ranges.append([glyphID]) |
| last = glyphID |
| ranges[-1].append(last) |
| return ranges, glyphIDs[0], glyphIDs[-1] |
| |
| |
| # Adapted from https://github.com/fonttools/fonttools/blob/f64f0b42f2d1163b2d85194e0979def539f5dca3/Lib/fontTools/ttLib/tables/otTables.py#L960-L989 |
| def _classDef_bytes( |
| class_data: List[Tuple[List[Tuple[int, int]], int, int]], |
| class_ids: List[int], |
| coverage=False, |
| ): |
| if not class_ids: |
| return 0 |
| first_ranges, min_glyph_id, max_glyph_id = class_data[class_ids[0]] |
| range_count = len(first_ranges) |
| for i in class_ids[1:]: |
| data = class_data[i] |
| range_count += len(data[0]) |
| min_glyph_id = min(min_glyph_id, data[1]) |
| max_glyph_id = max(max_glyph_id, data[2]) |
| glyphCount = max_glyph_id - min_glyph_id + 1 |
| # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-1 |
| format1_bytes = 6 + glyphCount * 2 |
| # https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#class-definition-table-format-2 |
| format2_bytes = 4 + range_count * 6 |
| return min(format1_bytes, format2_bytes) |
| |
| |
| ClusteringContext = namedtuple( |
| "ClusteringContext", |
| [ |
| "lines", |
| "all_class1", |
| "all_class1_data", |
| "all_class2_data", |
| "valueFormat1_bytes", |
| "valueFormat2_bytes", |
| ], |
| ) |
| |
| |
| class Cluster: |
| # TODO(Python 3.7): Turn this into a dataclass |
| # ctx: ClusteringContext |
| # indices: int |
| # Caches |
| # TODO(Python 3.8): use functools.cached_property instead of the |
| # manually cached properties, and remove the cache fields listed below. |
| # _indices: Optional[List[int]] = None |
| # _column_indices: Optional[List[int]] = None |
| # _cost: Optional[int] = None |
| |
| __slots__ = "ctx", "indices_bitmask", "_indices", "_column_indices", "_cost" |
| |
| def __init__(self, ctx: ClusteringContext, indices_bitmask: int): |
| self.ctx = ctx |
| self.indices_bitmask = indices_bitmask |
| self._indices = None |
| self._column_indices = None |
| self._cost = None |
| |
| @property |
| def indices(self): |
| if self._indices is None: |
| self._indices = bit_indices(self.indices_bitmask) |
| return self._indices |
| |
| @property |
| def column_indices(self): |
| if self._column_indices is None: |
| # Indices of columns that have a 1 in at least 1 line |
| # => binary OR all the lines |
| bitmask = reduce(int.__or__, (self.ctx.lines[i] for i in self.indices)) |
| self._column_indices = bit_indices(bitmask) |
| return self._column_indices |
| |
| @property |
| def width(self): |
| # Add 1 because Class2=0 cannot be used but needs to be encoded. |
| return len(self.column_indices) + 1 |
| |
| @property |
| def cost(self): |
| if self._cost is None: |
| self._cost = ( |
| # 2 bytes to store the offset to this subtable in the Lookup table above |
| 2 |
| # Contents of the subtable |
| # From: https://docs.microsoft.com/en-us/typography/opentype/spec/gpos#pair-adjustment-positioning-format-2-class-pair-adjustment |
| # uint16 posFormat Format identifier: format = 2 |
| + 2 |
| # Offset16 coverageOffset Offset to Coverage table, from beginning of PairPos subtable. |
| + 2 |
| + self.coverage_bytes |
| # uint16 valueFormat1 ValueRecord definition — for the first glyph of the pair (may be zero). |
| + 2 |
| # uint16 valueFormat2 ValueRecord definition — for the second glyph of the pair (may be zero). |
| + 2 |
| # Offset16 classDef1Offset Offset to ClassDef table, from beginning of PairPos subtable — for the first glyph of the pair. |
| + 2 |
| + self.classDef1_bytes |
| # Offset16 classDef2Offset Offset to ClassDef table, from beginning of PairPos subtable — for the second glyph of the pair. |
| + 2 |
| + self.classDef2_bytes |
| # uint16 class1Count Number of classes in classDef1 table — includes Class 0. |
| + 2 |
| # uint16 class2Count Number of classes in classDef2 table — includes Class 0. |
| + 2 |
| # Class1Record class1Records[class1Count] Array of Class1 records, ordered by classes in classDef1. |
| + (self.ctx.valueFormat1_bytes + self.ctx.valueFormat2_bytes) |
| * len(self.indices) |
| * self.width |
| ) |
| return self._cost |
| |
| @property |
| def coverage_bytes(self): |
| format1_bytes = ( |
| # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-1 |
| # uint16 coverageFormat Format identifier — format = 1 |
| # uint16 glyphCount Number of glyphs in the glyph array |
| 4 |
| # uint16 glyphArray[glyphCount] Array of glyph IDs — in numerical order |
| + sum(len(self.ctx.all_class1[i]) for i in self.indices) * 2 |
| ) |
| ranges = sorted( |
| chain.from_iterable(self.ctx.all_class1_data[i][0] for i in self.indices) |
| ) |
| merged_range_count = 0 |
| last = None |
| for (start, end) in ranges: |
| if last is not None and start != last + 1: |
| merged_range_count += 1 |
| last = end |
| format2_bytes = ( |
| # From https://docs.microsoft.com/en-us/typography/opentype/spec/chapter2#coverage-format-2 |
| # uint16 coverageFormat Format identifier — format = 2 |
| # uint16 rangeCount Number of RangeRecords |
| 4 |
| # RangeRecord rangeRecords[rangeCount] Array of glyph ranges — ordered by startGlyphID. |
| # uint16 startGlyphID First glyph ID in the range |
| # uint16 endGlyphID Last glyph ID in the range |
| # uint16 startCoverageIndex Coverage Index of first glyph ID in range |
| + merged_range_count * 6 |
| ) |
| return min(format1_bytes, format2_bytes) |
| |
| @property |
| def classDef1_bytes(self): |
| # We can skip encoding one of the Class1 definitions, and use |
| # Class1=0 to represent it instead, because Class1 is gated by the |
| # Coverage definition. Use Class1=0 for the highest byte savings. |
| # Going through all options takes too long, pick the biggest class |
| # = what happens in otlLib.builder.ClassDefBuilder.classes() |
| biggest_index = max(self.indices, key=lambda i: len(self.ctx.all_class1[i])) |
| return _classDef_bytes( |
| self.ctx.all_class1_data, [i for i in self.indices if i != biggest_index] |
| ) |
| |
| @property |
| def classDef2_bytes(self): |
| # All Class2 need to be encoded because we can't use Class2=0 |
| return _classDef_bytes(self.ctx.all_class2_data, self.column_indices) |
| |
| |
| def cluster_pairs_by_class2_coverage_custom_cost( |
| font: TTFont, |
| pairs: Pairs, |
| compression: int = 5, |
| ) -> List[Pairs]: |
| if not pairs: |
| # The subtable was actually empty? |
| return [pairs] |
| |
| # Sorted for reproducibility/determinism |
| all_class1 = sorted(set(pair[0] for pair in pairs)) |
| all_class2 = sorted(set(pair[1] for pair in pairs)) |
| |
| # Use Python's big ints for binary vectors representing each line |
| lines = [ |
| sum( |
| 1 << i if (class1, class2) in pairs else 0 |
| for i, class2 in enumerate(all_class2) |
| ) |
| for class1 in all_class1 |
| ] |
| |
| # Map glyph names to ids and work with ints throughout for ClassDef formats |
| name_to_id = font.getReverseGlyphMap() |
| # Each entry in the arrays below is (range_count, min_glyph_id, max_glyph_id) |
| all_class1_data = [ |
| _getClassRanges(name_to_id[name] for name in cls) for cls in all_class1 |
| ] |
| all_class2_data = [ |
| _getClassRanges(name_to_id[name] for name in cls) for cls in all_class2 |
| ] |
| |
| format1 = 0 |
| format2 = 0 |
| for pair, value in pairs.items(): |
| format1 |= value[0].getEffectiveFormat() if value[0] else 0 |
| format2 |= value[1].getEffectiveFormat() if value[1] else 0 |
| valueFormat1_bytes = bit_count(format1) * 2 |
| valueFormat2_bytes = bit_count(format2) * 2 |
| |
| ctx = ClusteringContext( |
| lines, |
| all_class1, |
| all_class1_data, |
| all_class2_data, |
| valueFormat1_bytes, |
| valueFormat2_bytes, |
| ) |
| |
| cluster_cache: Dict[int, Cluster] = {} |
| |
| def make_cluster(indices: int) -> Cluster: |
| cluster = cluster_cache.get(indices, None) |
| if cluster is not None: |
| return cluster |
| cluster = Cluster(ctx, indices) |
| cluster_cache[indices] = cluster |
| return cluster |
| |
| def merge(cluster: Cluster, other: Cluster) -> Cluster: |
| return make_cluster(cluster.indices_bitmask | other.indices_bitmask) |
| |
| # Agglomerative clustering by hand, checking the cost gain of the new |
| # cluster against the previously separate clusters |
| # Start with 1 cluster per line |
| # cluster = set of lines = new subtable |
| clusters = [make_cluster(1 << i) for i in range(len(lines))] |
| |
| # Cost of 1 cluster with everything |
| # `(1 << len) - 1` gives a bitmask full of 1's of length `len` |
| cost_before_splitting = make_cluster((1 << len(lines)) - 1).cost |
| log.debug(f" len(clusters) = {len(clusters)}") |
| |
| while len(clusters) > 1: |
| lowest_cost_change = None |
| best_cluster_index = None |
| best_other_index = None |
| best_merged = None |
| for i, cluster in enumerate(clusters): |
| for j, other in enumerate(clusters[i + 1 :]): |
| merged = merge(cluster, other) |
| cost_change = merged.cost - cluster.cost - other.cost |
| if lowest_cost_change is None or cost_change < lowest_cost_change: |
| lowest_cost_change = cost_change |
| best_cluster_index = i |
| best_other_index = i + 1 + j |
| best_merged = merged |
| assert lowest_cost_change is not None |
| assert best_cluster_index is not None |
| assert best_other_index is not None |
| assert best_merged is not None |
| |
| # If the best merge we found is still taking down the file size, then |
| # there's no question: we must do it, because it's beneficial in both |
| # ways (lower file size and lower number of subtables). However, if the |
| # best merge we found is not reducing file size anymore, then we need to |
| # look at the other stop criteria = the compression factor. |
| if lowest_cost_change > 0: |
| # Stop critera: check whether we should keep merging. |
| # Compute size reduction brought by splitting |
| cost_after_splitting = sum(c.cost for c in clusters) |
| # size_reduction so that after = before * (1 - size_reduction) |
| # E.g. before = 1000, after = 800, 1 - 800/1000 = 0.2 |
| size_reduction = 1 - cost_after_splitting / cost_before_splitting |
| |
| # Force more merging by taking into account the compression number. |
| # Target behaviour: compression number = 1 to 9, default 5 like gzip |
| # - 1 = accept to add 1 subtable to reduce size by 50% |
| # - 5 = accept to add 5 subtables to reduce size by 50% |
| # See https://github.com/harfbuzz/packtab/blob/master/Lib/packTab/__init__.py#L690-L691 |
| # Given the size reduction we have achieved so far, compute how many |
| # new subtables are acceptable. |
| max_new_subtables = -log2(1 - size_reduction) * compression |
| log.debug( |
| f" len(clusters) = {len(clusters):3d} size_reduction={size_reduction:5.2f} max_new_subtables={max_new_subtables}", |
| ) |
| if compression == 9: |
| # Override level 9 to mean: create any number of subtables |
| max_new_subtables = len(clusters) |
| |
| # If we have managed to take the number of new subtables below the |
| # threshold, then we can stop. |
| if len(clusters) <= max_new_subtables + 1: |
| break |
| |
| # No reason to stop yet, do the merge and move on to the next. |
| del clusters[best_other_index] |
| clusters[best_cluster_index] = best_merged |
| |
| # All clusters are final; turn bitmasks back into the "Pairs" format |
| pairs_by_class1: Dict[Tuple[str, ...], Pairs] = defaultdict(dict) |
| for pair, values in pairs.items(): |
| pairs_by_class1[pair[0]][pair] = values |
| pairs_groups: List[Pairs] = [] |
| for cluster in clusters: |
| pairs_group: Pairs = dict() |
| for i in cluster.indices: |
| class1 = all_class1[i] |
| pairs_group.update(pairs_by_class1[class1]) |
| pairs_groups.append(pairs_group) |
| return pairs_groups |