| from __future__ import annotations |
| |
| import re |
| from functools import lru_cache |
| from itertools import chain, count |
| from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple |
| |
| try: |
| from lxml import etree |
| except ImportError: |
| # lxml is required for subsetting SVG, but we prefer to delay the import error |
| # until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table) |
| etree = None |
| |
| from fontTools import ttLib |
| from fontTools.subset.util import _add_method |
| from fontTools.ttLib.tables.S_V_G_ import SVGDocument |
| |
| |
| __all__ = ["subset_glyphs"] |
| |
| |
| GID_RE = re.compile(r"^glyph(\d+)$") |
| |
| NAMESPACES = { |
| "svg": "http://www.w3.org/2000/svg", |
| "xlink": "http://www.w3.org/1999/xlink", |
| } |
| XLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href' |
| |
| |
| # TODO(antrotype): Replace with functools.cache once we are 3.9+ |
| @lru_cache(maxsize=None) |
| def xpath(path): |
| # compile XPath upfront, caching result to reuse on multiple elements |
| return etree.XPath(path, namespaces=NAMESPACES) |
| |
| |
| def group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]: |
| # select all svg elements with 'id' attribute no matter where they are |
| # including the root element itself: |
| # https://github.com/fonttools/fonttools/issues/2548 |
| return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)} |
| |
| |
| def parse_css_declarations(style_attr: str) -> Dict[str, str]: |
| # https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style |
| # https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations |
| result = {} |
| for declaration in style_attr.split(";"): |
| if declaration.count(":") == 1: |
| property_name, value = declaration.split(":") |
| property_name = property_name.strip() |
| result[property_name] = value.strip() |
| elif declaration.strip(): |
| raise ValueError(f"Invalid CSS declaration syntax: {declaration}") |
| return result |
| |
| |
| def iter_referenced_ids(tree: etree.Element) -> Iterator[str]: |
| # Yield all the ids that can be reached via references from this element tree. |
| # We currently support xlink:href (as used by <use> and gradient templates), |
| # and local url(#...) links found in fill or clip-path attributes |
| # TODO(anthrotype): Check we aren't missing other supported kinds of reference |
| find_svg_elements_with_references = xpath( |
| ".//svg:*[ " |
| "starts-with(@xlink:href, '#') " |
| "or starts-with(@fill, 'url(#') " |
| "or starts-with(@clip-path, 'url(#') " |
| "or contains(@style, ':url(#') " |
| "]", |
| ) |
| for el in chain([tree], find_svg_elements_with_references(tree)): |
| ref_id = href_local_target(el) |
| if ref_id is not None: |
| yield ref_id |
| |
| attrs = el.attrib |
| if "style" in attrs: |
| attrs = {**dict(attrs), **parse_css_declarations(el.attrib["style"])} |
| for attr in ("fill", "clip-path"): |
| if attr in attrs: |
| value = attrs[attr] |
| if value.startswith("url(#") and value.endswith(")"): |
| ref_id = value[5:-1] |
| assert ref_id |
| yield ref_id |
| |
| |
| def closure_element_ids( |
| elements: Dict[str, etree.Element], element_ids: Set[str] |
| ) -> None: |
| # Expand the initial subset of element ids to include ids that can be reached |
| # via references from the initial set. |
| unvisited = element_ids |
| while unvisited: |
| referenced: Set[str] = set() |
| for el_id in unvisited: |
| if el_id not in elements: |
| # ignore dangling reference; not our job to validate svg |
| continue |
| referenced.update(iter_referenced_ids(elements[el_id])) |
| referenced -= element_ids |
| element_ids.update(referenced) |
| unvisited = referenced |
| |
| |
| def subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool: |
| # Keep elements if their id is in the subset, or any of their children's id is. |
| # Drop elements whose id is not in the subset, and either have no children, |
| # or all their children are being dropped. |
| if el.attrib.get("id") in retained_ids: |
| # if id is in the set, don't recurse; keep whole subtree |
| return True |
| # recursively subset all the children; we use a list comprehension instead |
| # of a parentheses-less generator expression because we don't want any() to |
| # short-circuit, as our function has a side effect of dropping empty elements. |
| if any([subset_elements(e, retained_ids) for e in el]): |
| return True |
| assert len(el) == 0 |
| parent = el.getparent() |
| if parent is not None: |
| parent.remove(el) |
| return False |
| |
| |
| def remap_glyph_ids( |
| svg: etree.Element, glyph_index_map: Dict[int, int] |
| ) -> Dict[str, str]: |
| # Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}" |
| # special attributes |
| elements = group_elements_by_id(svg) |
| id_map = {} |
| for el_id, el in elements.items(): |
| m = GID_RE.match(el_id) |
| if not m: |
| continue |
| old_index = int(m.group(1)) |
| new_index = glyph_index_map.get(old_index) |
| if new_index is not None: |
| if old_index == new_index: |
| continue |
| new_id = f"glyph{new_index}" |
| else: |
| # If the old index is missing, the element correspond to a glyph that was |
| # excluded from the font's subset. |
| # We rename it to avoid clashes with the new GIDs or other element ids. |
| new_id = f".{el_id}" |
| n = count(1) |
| while new_id in elements: |
| new_id = f"{new_id}.{next(n)}" |
| |
| id_map[el_id] = new_id |
| el.attrib["id"] = new_id |
| |
| return id_map |
| |
| |
| def href_local_target(el: etree.Element) -> Optional[str]: |
| if XLINK_HREF in el.attrib: |
| href = el.attrib[XLINK_HREF] |
| if href.startswith("#") and len(href) > 1: |
| return href[1:] # drop the leading # |
| return None |
| |
| |
| def update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None: |
| # update all xlink:href="#glyph..." attributes to point to the new glyph ids |
| for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg): |
| old_id = href_local_target(el) |
| assert old_id is not None |
| if old_id in id_map: |
| new_id = id_map[old_id] |
| el.attrib[XLINK_HREF] = f"#{new_id}" |
| |
| |
| def ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]: |
| # Yield sorted, non-overlapping (min, max) ranges of consecutive integers |
| sorted_ints = iter(sorted(set(ints))) |
| try: |
| start = end = next(sorted_ints) |
| except StopIteration: |
| return |
| for v in sorted_ints: |
| if v - 1 == end: |
| end = v |
| else: |
| yield (start, end) |
| start = end = v |
| yield (start, end) |
| |
| |
| @_add_method(ttLib.getTableClass("SVG ")) |
| def subset_glyphs(self, s) -> bool: |
| if etree is None: |
| raise ImportError("No module named 'lxml', required to subset SVG") |
| |
| # glyph names (before subsetting) |
| glyph_order: List[str] = s.orig_glyph_order |
| # map from glyph names to original glyph indices |
| rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap |
| # map from original to new glyph indices (after subsetting) |
| glyph_index_map: Dict[int, int] = s.glyph_index_map |
| |
| new_docs: List[SVGDocument] = [] |
| for doc in self.docList: |
| glyphs = { |
| glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1) |
| }.intersection(s.glyphs) |
| if not glyphs: |
| # no intersection: we can drop the whole record |
| continue |
| |
| svg = etree.fromstring( |
| # encode because fromstring dislikes xml encoding decl if input is str. |
| # SVG xml encoding must be utf-8 as per OT spec. |
| doc.data.encode("utf-8"), |
| parser=etree.XMLParser( |
| # Disable libxml2 security restrictions to support very deep trees. |
| # Without this we would get an error like this: |
| # `lxml.etree.XMLSyntaxError: internal error: Huge input lookup` |
| # when parsing big fonts e.g. noto-emoji-picosvg.ttf. |
| huge_tree=True, |
| # ignore blank text as it's not meaningful in OT-SVG; it also prevents |
| # dangling tail text after removing an element when pretty_print=True |
| remove_blank_text=True, |
| # don't replace entities; we don't expect any in OT-SVG and they may |
| # be abused for XXE attacks |
| resolve_entities=False, |
| ), |
| ) |
| |
| elements = group_elements_by_id(svg) |
| gids = {rev_orig_glyph_map[g] for g in glyphs} |
| element_ids = {f"glyph{i}" for i in gids} |
| closure_element_ids(elements, element_ids) |
| |
| if not subset_elements(svg, element_ids): |
| continue |
| |
| if not s.options.retain_gids: |
| id_map = remap_glyph_ids(svg, glyph_index_map) |
| update_glyph_href_links(svg, id_map) |
| |
| new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8") |
| |
| new_gids = (glyph_index_map[i] for i in gids) |
| for start, end in ranges(new_gids): |
| new_docs.append(SVGDocument(new_doc, start, end, doc.compressed)) |
| |
| self.docList = new_docs |
| |
| return bool(self.docList) |