Lib/fontTools/subset/svg.py - platform/external/fonttools - Git at Google

 from __future__ import annotations

 import re
 from functools import lru_cache
 from itertools import chain, count
 from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple

 try:
     from lxml import etree
 except ImportError:
     # lxml is required for subsetting SVG, but we prefer to delay the import error
     # until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table)
     etree = None

 from fontTools import ttLib
 from fontTools.subset.util import _add_method
 from fontTools.ttLib.tables.S_V_G_ import SVGDocument


 __all__ = ["subset_glyphs"]


 GID_RE = re.compile(r"^glyph(\d+)$")

 NAMESPACES = {
     "svg": "http://www.w3.org/2000/svg",
     "xlink": "http://www.w3.org/1999/xlink",
 }
 XLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href'


 # TODO(antrotype): Replace with functools.cache once we are 3.9+
 @lru_cache(maxsize=None)
 def xpath(path):
     # compile XPath upfront, caching result to reuse on multiple elements
     return etree.XPath(path, namespaces=NAMESPACES)


 def group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]:
     # select all svg elements with 'id' attribute no matter where they are
     # including the root element itself:
     # https://github.com/fonttools/fonttools/issues/2548
     return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)}


 def parse_css_declarations(style_attr: str) -> Dict[str, str]:
     # https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style
     # https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations
     result = {}
     for declaration in style_attr.split(";"):
         if declaration.count(":") == 1:
             property_name, value = declaration.split(":")
             property_name = property_name.strip()
             result[property_name] = value.strip()
         elif declaration.strip():
             raise ValueError(f"Invalid CSS declaration syntax: {declaration}")
     return result


 def iter_referenced_ids(tree: etree.Element) -> Iterator[str]:
     # Yield all the ids that can be reached via references from this element tree.
     # We currently support xlink:href (as used by <use> and gradient templates),
     # and local url(#...) links found in fill or clip-path attributes
     # TODO(anthrotype): Check we aren't missing other supported kinds of reference
     find_svg_elements_with_references = xpath(
         ".//svg:*[ "
         "starts-with(@xlink:href, '#') "
         "or starts-with(@fill, 'url(#') "
         "or starts-with(@clip-path, 'url(#') "
         "or contains(@style, ':url(#') "
         "]",
     )
     for el in chain([tree], find_svg_elements_with_references(tree)):
         ref_id = href_local_target(el)
         if ref_id is not None:
             yield ref_id

         attrs = el.attrib
         if "style" in attrs:
             attrs = {**dict(attrs), **parse_css_declarations(el.attrib["style"])}
         for attr in ("fill", "clip-path"):
             if attr in attrs:
                 value = attrs[attr]
                 if value.startswith("url(#") and value.endswith(")"):
                     ref_id = value[5:-1]
                     assert ref_id
                     yield ref_id


 def closure_element_ids(
     elements: Dict[str, etree.Element], element_ids: Set[str]
 ) -> None:
     # Expand the initial subset of element ids to include ids that can be reached
     # via references from the initial set.
     unvisited = element_ids
     while unvisited:
         referenced: Set[str] = set()
         for el_id in unvisited:
             if el_id not in elements:
                 # ignore dangling reference; not our job to validate svg
                 continue
             referenced.update(iter_referenced_ids(elements[el_id]))
         referenced -= element_ids
         element_ids.update(referenced)
         unvisited = referenced


 def subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool:
     # Keep elements if their id is in the subset, or any of their children's id is.
     # Drop elements whose id is not in the subset, and either have no children,
     # or all their children are being dropped.
     if el.attrib.get("id") in retained_ids:
         # if id is in the set, don't recurse; keep whole subtree
         return True
     # recursively subset all the children; we use a list comprehension instead
     # of a parentheses-less generator expression because we don't want any() to
     # short-circuit, as our function has a side effect of dropping empty elements.
     if any([subset_elements(e, retained_ids) for e in el]):
         return True
     assert len(el) == 0
     parent = el.getparent()
     if parent is not None:
         parent.remove(el)
     return False


 def remap_glyph_ids(
     svg: etree.Element, glyph_index_map: Dict[int, int]
 ) -> Dict[str, str]:
     # Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}"
     # special attributes
     elements = group_elements_by_id(svg)
     id_map = {}
     for el_id, el in elements.items():
         m = GID_RE.match(el_id)
         if not m:
             continue
         old_index = int(m.group(1))
         new_index = glyph_index_map.get(old_index)
         if new_index is not None:
             if old_index == new_index:
                 continue
             new_id = f"glyph{new_index}"
         else:
             # If the old index is missing, the element correspond to a glyph that was
             # excluded from the font's subset.
             # We rename it to avoid clashes with the new GIDs or other element ids.
             new_id = f".{el_id}"
             n = count(1)
             while new_id in elements:
                 new_id = f"{new_id}.{next(n)}"

         id_map[el_id] = new_id
         el.attrib["id"] = new_id

     return id_map


 def href_local_target(el: etree.Element) -> Optional[str]:
     if XLINK_HREF in el.attrib:
         href = el.attrib[XLINK_HREF]
         if href.startswith("#") and len(href) > 1:
             return href[1:]  # drop the leading #
     return None


 def update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None:
     # update all xlink:href="#glyph..." attributes to point to the new glyph ids
     for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg):
         old_id = href_local_target(el)
         assert old_id is not None
         if old_id in id_map:
             new_id = id_map[old_id]
             el.attrib[XLINK_HREF] = f"#{new_id}"


 def ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]:
     # Yield sorted, non-overlapping (min, max) ranges of consecutive integers
     sorted_ints = iter(sorted(set(ints)))
     try:
         start = end = next(sorted_ints)
     except StopIteration:
         return
     for v in sorted_ints:
         if v - 1 == end:
             end = v
         else:
             yield (start, end)
             start = end = v
     yield (start, end)


 @_add_method(ttLib.getTableClass("SVG "))
 def subset_glyphs(self, s) -> bool:
     if etree is None:
         raise ImportError("No module named 'lxml', required to subset SVG")

     # glyph names (before subsetting)
     glyph_order: List[str] = s.orig_glyph_order
     # map from glyph names to original glyph indices
     rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap
     # map from original to new glyph indices (after subsetting)
     glyph_index_map: Dict[int, int] = s.glyph_index_map

     new_docs: List[SVGDocument] = []
     for doc in self.docList:
         glyphs = {
             glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1)
         }.intersection(s.glyphs)
         if not glyphs:
             # no intersection: we can drop the whole record
             continue

         svg = etree.fromstring(
             # encode because fromstring dislikes xml encoding decl if input is str.
             # SVG xml encoding must be utf-8 as per OT spec.
             doc.data.encode("utf-8"),
             parser=etree.XMLParser(
                 # Disable libxml2 security restrictions to support very deep trees.
                 # Without this we would get an error like this:
                 # `lxml.etree.XMLSyntaxError: internal error: Huge input lookup`
                 # when parsing big fonts e.g. noto-emoji-picosvg.ttf.
                 huge_tree=True,
                 # ignore blank text as it's not meaningful in OT-SVG; it also prevents
                 # dangling tail text after removing an element when pretty_print=True
                 remove_blank_text=True,
                 # don't replace entities; we don't expect any in OT-SVG and they may
                 # be abused for XXE attacks
                 resolve_entities=False,
             ),
         )

         elements = group_elements_by_id(svg)
         gids = {rev_orig_glyph_map[g] for g in glyphs}
         element_ids = {f"glyph{i}" for i in gids}
         closure_element_ids(elements, element_ids)

         if not subset_elements(svg, element_ids):
             continue

         if not s.options.retain_gids:
             id_map = remap_glyph_ids(svg, glyph_index_map)
             update_glyph_href_links(svg, id_map)

         new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8")

         new_gids = (glyph_index_map[i] for i in gids)
         for start, end in ranges(new_gids):
             new_docs.append(SVGDocument(new_doc, start, end, doc.compressed))

     self.docList = new_docs

     return bool(self.docList)
	from __future__ import annotations

	import re
	from functools import lru_cache
	from itertools import chain, count
	from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple

	try:
	from lxml import etree
	except ImportError:
	# lxml is required for subsetting SVG, but we prefer to delay the import error
	# until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table)
	etree = None

	from fontTools import ttLib
	from fontTools.subset.util import _add_method
	from fontTools.ttLib.tables.S_V_G_ import SVGDocument


	__all__ = ["subset_glyphs"]


	GID_RE = re.compile(r"^glyph(\d+)$")

	NAMESPACES = {
	"svg": "http://www.w3.org/2000/svg",
	"xlink": "http://www.w3.org/1999/xlink",
	}
	XLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href'


	# TODO(antrotype): Replace with functools.cache once we are 3.9+
	@lru_cache(maxsize=None)
	def xpath(path):
	# compile XPath upfront, caching result to reuse on multiple elements
	return etree.XPath(path, namespaces=NAMESPACES)


	def group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]:
	# select all svg elements with 'id' attribute no matter where they are
	# including the root element itself:
	# https://github.com/fonttools/fonttools/issues/2548
	return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)}


	def parse_css_declarations(style_attr: str) -> Dict[str, str]:
	# https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style
	# https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations
	result = {}
	for declaration in style_attr.split(";"):
	if declaration.count(":") == 1:
	property_name, value = declaration.split(":")
	property_name = property_name.strip()
	result[property_name] = value.strip()
	elif declaration.strip():
	raise ValueError(f"Invalid CSS declaration syntax: {declaration}")
	return result


	def iter_referenced_ids(tree: etree.Element) -> Iterator[str]:
	# Yield all the ids that can be reached via references from this element tree.
	# We currently support xlink:href (as used by <use> and gradient templates),
	# and local url(#...) links found in fill or clip-path attributes
	# TODO(anthrotype): Check we aren't missing other supported kinds of reference
	find_svg_elements_with_references = xpath(
	".//svg:*[ "
	"starts-with(@xlink:href, '#') "
	"or starts-with(@fill, 'url(#') "
	"or starts-with(@clip-path, 'url(#') "
	"or contains(@style, ':url(#') "
	"]",
	)
	for el in chain([tree], find_svg_elements_with_references(tree)):
	ref_id = href_local_target(el)
	if ref_id is not None:
	yield ref_id

	attrs = el.attrib
	if "style" in attrs:
	attrs = {dict(attrs), parse_css_declarations(el.attrib["style"])}
	for attr in ("fill", "clip-path"):
	if attr in attrs:
	value = attrs[attr]
	if value.startswith("url(#") and value.endswith(")"):
	ref_id = value[5:-1]
	assert ref_id
	yield ref_id


	def closure_element_ids(
	elements: Dict[str, etree.Element], element_ids: Set[str]
	) -> None:
	# Expand the initial subset of element ids to include ids that can be reached
	# via references from the initial set.
	unvisited = element_ids
	while unvisited:
	referenced: Set[str] = set()
	for el_id in unvisited:
	if el_id not in elements:
	# ignore dangling reference; not our job to validate svg
	continue
	referenced.update(iter_referenced_ids(elements[el_id]))
	referenced -= element_ids
	element_ids.update(referenced)
	unvisited = referenced


	def subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool:
	# Keep elements if their id is in the subset, or any of their children's id is.
	# Drop elements whose id is not in the subset, and either have no children,
	# or all their children are being dropped.
	if el.attrib.get("id") in retained_ids:
	# if id is in the set, don't recurse; keep whole subtree
	return True
	# recursively subset all the children; we use a list comprehension instead
	# of a parentheses-less generator expression because we don't want any() to
	# short-circuit, as our function has a side effect of dropping empty elements.
	if any([subset_elements(e, retained_ids) for e in el]):
	return True
	assert len(el) == 0
	parent = el.getparent()
	if parent is not None:
	parent.remove(el)
	return False


	def remap_glyph_ids(
	svg: etree.Element, glyph_index_map: Dict[int, int]
	) -> Dict[str, str]:
	# Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}"
	# special attributes
	elements = group_elements_by_id(svg)
	id_map = {}
	for el_id, el in elements.items():
	m = GID_RE.match(el_id)
	if not m:
	continue
	old_index = int(m.group(1))
	new_index = glyph_index_map.get(old_index)
	if new_index is not None:
	if old_index == new_index:
	continue
	new_id = f"glyph{new_index}"
	else:
	# If the old index is missing, the element correspond to a glyph that was
	# excluded from the font's subset.
	# We rename it to avoid clashes with the new GIDs or other element ids.
	new_id = f".{el_id}"
	n = count(1)
	while new_id in elements:
	new_id = f"{new_id}.{next(n)}"

	id_map[el_id] = new_id
	el.attrib["id"] = new_id

	return id_map


	def href_local_target(el: etree.Element) -> Optional[str]:
	if XLINK_HREF in el.attrib:
	href = el.attrib[XLINK_HREF]
	if href.startswith("#") and len(href) > 1:
	return href[1:] # drop the leading #
	return None


	def update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None:
	# update all xlink:href="#glyph..." attributes to point to the new glyph ids
	for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg):
	old_id = href_local_target(el)
	assert old_id is not None
	if old_id in id_map:
	new_id = id_map[old_id]
	el.attrib[XLINK_HREF] = f"#{new_id}"


	def ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]:
	# Yield sorted, non-overlapping (min, max) ranges of consecutive integers
	sorted_ints = iter(sorted(set(ints)))
	try:
	start = end = next(sorted_ints)
	except StopIteration:
	return
	for v in sorted_ints:
	if v - 1 == end:
	end = v
	else:
	yield (start, end)
	start = end = v
	yield (start, end)


	@_add_method(ttLib.getTableClass("SVG "))
	def subset_glyphs(self, s) -> bool:
	if etree is None:
	raise ImportError("No module named 'lxml', required to subset SVG")

	# glyph names (before subsetting)
	glyph_order: List[str] = s.orig_glyph_order
	# map from glyph names to original glyph indices
	rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap
	# map from original to new glyph indices (after subsetting)
	glyph_index_map: Dict[int, int] = s.glyph_index_map

	new_docs: List[SVGDocument] = []
	for doc in self.docList:
	glyphs = {
	glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1)
	}.intersection(s.glyphs)
	if not glyphs:
	# no intersection: we can drop the whole record
	continue

	svg = etree.fromstring(
	# encode because fromstring dislikes xml encoding decl if input is str.
	# SVG xml encoding must be utf-8 as per OT spec.
	doc.data.encode("utf-8"),
	parser=etree.XMLParser(
	# Disable libxml2 security restrictions to support very deep trees.
	# Without this we would get an error like this:
	# `lxml.etree.XMLSyntaxError: internal error: Huge input lookup`
	# when parsing big fonts e.g. noto-emoji-picosvg.ttf.
	huge_tree=True,
	# ignore blank text as it's not meaningful in OT-SVG; it also prevents
	# dangling tail text after removing an element when pretty_print=True
	remove_blank_text=True,
	# don't replace entities; we don't expect any in OT-SVG and they may
	# be abused for XXE attacks
	resolve_entities=False,
	),
	)

	elements = group_elements_by_id(svg)
	gids = {rev_orig_glyph_map[g] for g in glyphs}
	element_ids = {f"glyph{i}" for i in gids}
	closure_element_ids(elements, element_ids)

	if not subset_elements(svg, element_ids):
	continue

	if not s.options.retain_gids:
	id_map = remap_glyph_ids(svg, glyph_index_map)
	update_glyph_href_links(svg, id_map)

	new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8")

	new_gids = (glyph_index_map[i] for i in gids)
	for start, end in ranges(new_gids):
	new_docs.append(SVGDocument(new_doc, start, end, doc.compressed))

	self.docList = new_docs

	return bool(self.docList)