| """Extend the Python codecs module with a few encodings that are used in OpenType (name table) |
| but missing from Python. See https://github.com/fonttools/fonttools/issues/236 for details.""" |
| |
| from fontTools.misc.py23 import * |
| import codecs |
| import encodings |
| |
| class ExtendCodec(codecs.Codec): |
| |
| def __init__(self, name, base_encoding, mapping): |
| self.name = name |
| self.base_encoding = base_encoding |
| self.mapping = mapping |
| self.reverse = {v:k for k,v in mapping.items()} |
| self.max_len = max(len(v) for v in mapping.values()) |
| self.info = codecs.CodecInfo(name=self.name, encode=self.encode, decode=self.decode) |
| codecs.register_error(name, self.error) |
| |
| def _map(self, mapper, output_type, exc_type, input, errors): |
| base_error_handler = codecs.lookup_error(errors) |
| length = len(input) |
| out = output_type() |
| while input: |
| # first try to use self.error as the error handler |
| try: |
| part = mapper(input, self.base_encoding, errors=self.name) |
| out += part |
| break # All converted |
| except exc_type as e: |
| # else convert the correct part, handle error as requested and continue |
| out += mapper(input[:e.start], self.base_encoding, self.name) |
| replacement, pos = base_error_handler(e) |
| out += replacement |
| input = input[pos:] |
| return out, length |
| |
| def encode(self, input, errors='strict'): |
| return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors) |
| |
| def decode(self, input, errors='strict'): |
| return self._map(codecs.decode, str, UnicodeDecodeError, input, errors) |
| |
| def error(self, e): |
| if isinstance(e, UnicodeDecodeError): |
| for end in range(e.start + 1, e.end + 1): |
| s = e.object[e.start:end] |
| if s in self.mapping: |
| return self.mapping[s], end |
| elif isinstance(e, UnicodeEncodeError): |
| for end in range(e.start + 1, e.start + self.max_len + 1): |
| s = e.object[e.start:end] |
| if s in self.reverse: |
| return self.reverse[s], end |
| e.encoding = self.name |
| raise e |
| |
| |
| _extended_encodings = { |
| "x_mac_japanese_ttx": ("shift_jis", { |
| b"\xFC": unichr(0x007C), |
| b"\x7E": unichr(0x007E), |
| b"\x80": unichr(0x005C), |
| b"\xA0": unichr(0x00A0), |
| b"\xFD": unichr(0x00A9), |
| b"\xFE": unichr(0x2122), |
| b"\xFF": unichr(0x2026), |
| }), |
| "x_mac_trad_chinese_ttx": ("big5", { |
| b"\x80": unichr(0x005C), |
| b"\xA0": unichr(0x00A0), |
| b"\xFD": unichr(0x00A9), |
| b"\xFE": unichr(0x2122), |
| b"\xFF": unichr(0x2026), |
| }), |
| "x_mac_korean_ttx": ("euc_kr", { |
| b"\x80": unichr(0x00A0), |
| b"\x81": unichr(0x20A9), |
| b"\x82": unichr(0x2014), |
| b"\x83": unichr(0x00A9), |
| b"\xFE": unichr(0x2122), |
| b"\xFF": unichr(0x2026), |
| }), |
| "x_mac_simp_chinese_ttx": ("gb2312", { |
| b"\x80": unichr(0x00FC), |
| b"\xA0": unichr(0x00A0), |
| b"\xFD": unichr(0x00A9), |
| b"\xFE": unichr(0x2122), |
| b"\xFF": unichr(0x2026), |
| }), |
| } |
| |
| _cache = {} |
| |
| def search_function(name): |
| name = encodings.normalize_encoding(name) # Rather undocumented... |
| if name in _extended_encodings: |
| if name not in _cache: |
| base_encoding, mapping = _extended_encodings[name] |
| assert(name[-4:] == "_ttx") |
| # Python 2 didn't have any of the encodings that we are implementing |
| # in this file. Python 3 added aliases for the East Asian ones, mapping |
| # them "temporarily" to the same base encoding as us, with a comment |
| # suggesting that full implementation will appear some time later. |
| # As such, try the Python version of the x_mac_... first, if that is found, |
| # use *that* as our base encoding. This would make our encoding upgrade |
| # to the full encoding when and if Python finally implements that. |
| # http://bugs.python.org/issue24041 |
| base_encodings = [name[:-4], base_encoding] |
| for base_encoding in base_encodings: |
| try: |
| codecs.lookup(base_encoding) |
| except LookupError: |
| continue |
| _cache[name] = ExtendCodec(name, base_encoding, mapping) |
| break |
| return _cache[name].info |
| |
| return None |
| |
| codecs.register(search_function) |