tools/stringslint/stringslint.py - platform/frameworks/base - Git at Google

 #!/usr/bin/env python

 # Copyright (C) 2018 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the 'License');
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an 'AS IS' BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 """
 Enforces common Android string best-practices.  It ignores lint messages from
 a previous strings file, if provided.

 Usage: stringslint.py strings.xml
 Usage: stringslint.py strings.xml old_strings.xml

 In general:
 * Errors signal issues that must be fixed before submitting, and are only
   used when there are no false-positives.
 * Warnings signal issues that might need to be fixed, but need manual
   inspection due to risk of false-positives.
 * Info signal issues that should be fixed to match best-practices, such
   as providing comments to aid translation.
 """

 import re, sys, codecs
 import lxml.etree as ET

 reload(sys)
 sys.setdefaultencoding('utf8')

 BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)

 def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
     # manually derived from http://en.wikipedia.org/wiki/ANSI_escape_code#Codes
     codes = []
     if reset: codes.append("0")
     else:
         if not fg is None: codes.append("3%d" % (fg))
         if not bg is None:
             if not bright: codes.append("4%d" % (bg))
             else: codes.append("10%d" % (bg))
         if bold: codes.append("1")
         elif dim: codes.append("2")
         else: codes.append("22")
     return "\033[%sm" % (";".join(codes))

 warnings = None

 def warn(tag, msg, actual, expected, color=YELLOW):
     global warnings
     key = "%s:%d" % (tag.attrib["name"], hash(msg))
     value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True),
                                        tag.sourceline,
                                        tag.attrib["name"],
                                        format(reset=True),
                                        msg)
     if not actual is None: value += "\n\tActual: %s%s%s" % (format(dim=True),
                                                             actual,
                                                             format(reset=True))
     if not expected is None: value += "\n\tExample: %s%s%s" % (format(dim=True),
                                                                expected,
                                                                format(reset=True))
     warnings[key] = value


 def error(tag, msg, actual, expected):
     warn(tag, msg, actual, expected, RED)

 def info(tag, msg, actual, expected):
     warn(tag, msg, actual, expected, CYAN)

 # Escaping logic borrowed from https://stackoverflow.com/a/24519338
 ESCAPE_SEQUENCE_RE = re.compile(r'''
     ( \\U........      # 8-digit hex escapes
     | \\u....          # 4-digit hex escapes
     | \\x..            # 2-digit hex escapes
     | \\[0-7]{1,3}     # Octal escapes
     | \\N\{[^}]+\}     # Unicode characters by name
     | \\[\\'"abfnrtv]  # Single-character escapes
     )''', re.UNICODE | re.VERBOSE)

 def decode_escapes(s):
     def decode_match(match):
         return codecs.decode(match.group(0), 'unicode-escape')

     s = re.sub(r"\n\s*", " ", s)
     s = ESCAPE_SEQUENCE_RE.sub(decode_match, s)
     s = re.sub(r"%(\d+\$)?[a-z]", "____", s)
     s = re.sub(r"\^\d+", "____", s)
     s = re.sub(r"<br/?>", "\n", s)
     s = re.sub(r"</?[a-z]+>", "", s)
     return s

 def sample_iter(tag):
     if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib:
         yield tag.attrib["example"]
     elif tag.text:
         yield decode_escapes(tag.text)
     for e in tag:
         for v in sample_iter(e):
             yield v
         if e.tail:
             yield decode_escapes(e.tail)

 def lint(path):
     global warnings
     warnings = {}

     with open(path) as f:
         raw = f.read()
         if len(raw.strip()) == 0:
             return warnings
         tree = ET.fromstring(raw)
         root = tree #tree.getroot()

     last_comment = None
     for child in root:
         # TODO: handle plurals
         if isinstance(child, ET._Comment):
             last_comment = child
         elif child.tag == "string":
             # We always consume comment
             comment = last_comment
             last_comment = None

             # Prepare string for analysis
             text = "".join(child.itertext())
             sample = "".join(sample_iter(child)).strip().strip("'\"")

             # Validate comment
             if comment is None:
                 info(child, "Missing string comment to aid translation",
                      None, None)
                 continue
             if "do not translate" in comment.text.lower():
                 continue
             if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false":
                 continue

             misspelled_attributes = [
               ("translateable", "translatable"),
             ]
             for misspelling, expected in misspelled_attributes:
                 if misspelling in child.attrib:
                     error(child, "Misspelled <string> attribute.", misspelling, expected)

             limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text)
             if limit is None:
                 info(child, "Missing CHAR LIMIT to aid translation",
                      repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->")
             elif re.match("\d+", limit.group(1)):
                 limit = int(limit.group(1))
                 if len(sample) > limit:
                     warn(child, "Expanded string length is larger than CHAR LIMIT",
                         sample, None)

             # Look for common mistakes/substitutions
             if "'" in text:
                 error(child, "Turned quotation mark glyphs are more polished",
                      text, "This doesn\u2019t need to \u2018happen\u2019 today")
             if '"' in text and not text.startswith('"') and text.endswith('"'):
                 error(child, "Turned quotation mark glyphs are more polished",
                      text, "This needs to \u201chappen\u201d today")
             if "..." in text:
                 error(child, "Ellipsis glyph is more polished",
                      text, "Loading\u2026")
             if "wi-fi" in text.lower():
                 error(child, "Non-breaking glyph is more polished",
                      text, "Wi\u2011Fi")
             if "wifi" in text.lower():
                 error(child, "Using non-standard spelling",
                      text, "Wi\u2011Fi")
             if re.search("\d-\d", text):
                 warn(child, "Ranges should use en dash glyph",
                      text, "You will find this material in chapters 8\u201312")
             if "--" in text:
                 warn(child, "Phrases should use em dash glyph",
                      text, "Upon discovering errors\u2014all 124 of them\u2014they recalled.")
             if ".  " in text:
                 warn(child, "Only use single space between sentences",
                      text, "First idea. Second idea.")
             if re.match(r"^[A-Z\s]{5,}$", text):
                 warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym",
                      text, "Refresh data")
             if " phone " in text and "product" not in child.attrib:
                 warn(child, "Strings mentioning phones should have variants for tablets",
                      text, None)

             # When more than one substitution, require indexes
             if len(re.findall("%[^%]", text)) > 1:
                 if len(re.findall("%[^\d]", text)) > 0:
                     error(child, "Substitutions must be indexed",
                          text, "Add %1$s to %2$s")

             # Require xliff substitutions
             for gc in child.iter():
                 badsub = False
                 if gc.tail and re.search("%[^%]", gc.tail): badsub = True
                 if re.match("{.*xliff.*}g", gc.tag):
                     if "id" not in gc.attrib:
                         error(child, "Substitutions must define id attribute",
                              None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
                     if "example" not in gc.attrib:
                         error(child, "Substitutions must define example attribute",
                              None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
                 else:
                     if gc.text and re.search("%[^%]", gc.text): badsub = True
                 if badsub:
                     error(child, "Substitutions must be inside xliff tags",
                          text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")

     return warnings

 if len(sys.argv) > 2:
     before = lint(sys.argv[2])
 else:
     before = {}
 after = lint(sys.argv[1])

 for b in before:
     if b in after:
         del after[b]

 if len(after) > 0:
     for a in sorted(after.keys()):
         print after[a]
         print
     sys.exit(1)
	#!/usr/bin/env python

	# Copyright (C) 2018 The Android Open Source Project
	#
	# Licensed under the Apache License, Version 2.0 (the 'License');
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an 'AS IS' BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""
	Enforces common Android string best-practices. It ignores lint messages from
	a previous strings file, if provided.

	Usage: stringslint.py strings.xml
	Usage: stringslint.py strings.xml old_strings.xml

	In general:
	* Errors signal issues that must be fixed before submitting, and are only
	used when there are no false-positives.
	* Warnings signal issues that might need to be fixed, but need manual
	inspection due to risk of false-positives.
	* Info signal issues that should be fixed to match best-practices, such
	as providing comments to aid translation.
	"""

	import re, sys, codecs
	import lxml.etree as ET

	reload(sys)
	sys.setdefaultencoding('utf8')

	BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)

	def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
	# manually derived from http://en.wikipedia.org/wiki/ANSI_escape_code#Codes
	codes = []
	if reset: codes.append("0")
	else:
	if not fg is None: codes.append("3%d" % (fg))
	if not bg is None:
	if not bright: codes.append("4%d" % (bg))
	else: codes.append("10%d" % (bg))
	if bold: codes.append("1")
	elif dim: codes.append("2")
	else: codes.append("22")
	return "\033[%sm" % (";".join(codes))

	warnings = None

	def warn(tag, msg, actual, expected, color=YELLOW):
	global warnings
	key = "%s:%d" % (tag.attrib["name"], hash(msg))
	value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True),
	tag.sourceline,
	tag.attrib["name"],
	format(reset=True),
	msg)
	if not actual is None: value += "\n\tActual: %s%s%s" % (format(dim=True),
	actual,
	format(reset=True))
	if not expected is None: value += "\n\tExample: %s%s%s" % (format(dim=True),
	expected,
	format(reset=True))
	warnings[key] = value


	def error(tag, msg, actual, expected):
	warn(tag, msg, actual, expected, RED)

	def info(tag, msg, actual, expected):
	warn(tag, msg, actual, expected, CYAN)

	# Escaping logic borrowed from https://stackoverflow.com/a/24519338
	ESCAPE_SEQUENCE_RE = re.compile(r'''
	( \\U........ # 8-digit hex escapes
	\| \\u.... # 4-digit hex escapes
	\| \\x.. # 2-digit hex escapes
	\| \\[0-7]{1,3} # Octal escapes
	\| \\N\{[^}]+\} # Unicode characters by name
	\| \\[\\'"abfnrtv] # Single-character escapes
	)''', re.UNICODE \| re.VERBOSE)

	def decode_escapes(s):
	def decode_match(match):
	return codecs.decode(match.group(0), 'unicode-escape')

	s = re.sub(r"\n\s*", " ", s)
	s = ESCAPE_SEQUENCE_RE.sub(decode_match, s)
	s = re.sub(r"%(\d+\$)?[a-z]", "____", s)
	s = re.sub(r"\^\d+", "____", s)
	s = re.sub(r"<br/?>", "\n", s)
	s = re.sub(r"</?[a-z]+>", "", s)
	return s

	def sample_iter(tag):
	if not isinstance(tag, ET._Comment) and re.match("{.xliff.}g", tag.tag) and "example" in tag.attrib:
	yield tag.attrib["example"]
	elif tag.text:
	yield decode_escapes(tag.text)
	for e in tag:
	for v in sample_iter(e):
	yield v
	if e.tail:
	yield decode_escapes(e.tail)

	def lint(path):
	global warnings
	warnings = {}

	with open(path) as f:
	raw = f.read()
	if len(raw.strip()) == 0:
	return warnings
	tree = ET.fromstring(raw)
	root = tree #tree.getroot()

	last_comment = None
	for child in root:
	# TODO: handle plurals
	if isinstance(child, ET._Comment):
	last_comment = child
	elif child.tag == "string":
	# We always consume comment
	comment = last_comment
	last_comment = None

	# Prepare string for analysis
	text = "".join(child.itertext())
	sample = "".join(sample_iter(child)).strip().strip("'\"")

	# Validate comment
	if comment is None:
	info(child, "Missing string comment to aid translation",
	None, None)
	continue
	if "do not translate" in comment.text.lower():
	continue
	if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false":
	continue

	misspelled_attributes = [
	("translateable", "translatable"),
	]
	for misspelling, expected in misspelled_attributes:
	if misspelling in child.attrib:
	error(child, "Misspelled <string> attribute.", misspelling, expected)

	limit = re.search("CHAR[ _-]LIMIT=(\d+\|NONE\|none)", comment.text)
	if limit is None:
	info(child, "Missing CHAR LIMIT to aid translation",
	repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->")
	elif re.match("\d+", limit.group(1)):
	limit = int(limit.group(1))
	if len(sample) > limit:
	warn(child, "Expanded string length is larger than CHAR LIMIT",
	sample, None)

	# Look for common mistakes/substitutions
	if "'" in text:
	error(child, "Turned quotation mark glyphs are more polished",
	text, "This doesn\u2019t need to \u2018happen\u2019 today")
	if '"' in text and not text.startswith('"') and text.endswith('"'):
	error(child, "Turned quotation mark glyphs are more polished",
	text, "This needs to \u201chappen\u201d today")
	if "..." in text:
	error(child, "Ellipsis glyph is more polished",
	text, "Loading\u2026")
	if "wi-fi" in text.lower():
	error(child, "Non-breaking glyph is more polished",
	text, "Wi\u2011Fi")
	if "wifi" in text.lower():
	error(child, "Using non-standard spelling",
	text, "Wi\u2011Fi")
	if re.search("\d-\d", text):
	warn(child, "Ranges should use en dash glyph",
	text, "You will find this material in chapters 8\u201312")
	if "--" in text:
	warn(child, "Phrases should use em dash glyph",
	text, "Upon discovering errors\u2014all 124 of them\u2014they recalled.")
	if ". " in text:
	warn(child, "Only use single space between sentences",
	text, "First idea. Second idea.")
	if re.match(r"^[A-Z\s]{5,}$", text):
	warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym",
	text, "Refresh data")
	if " phone " in text and "product" not in child.attrib:
	warn(child, "Strings mentioning phones should have variants for tablets",
	text, None)

	# When more than one substitution, require indexes
	if len(re.findall("%[^%]", text)) > 1:
	if len(re.findall("%[^\d]", text)) > 0:
	error(child, "Substitutions must be indexed",
	text, "Add %1$s to %2$s")

	# Require xliff substitutions
	for gc in child.iter():
	badsub = False
	if gc.tail and re.search("%[^%]", gc.tail): badsub = True
	if re.match("{.xliff.}g", gc.tag):
	if "id" not in gc.attrib:
	error(child, "Substitutions must define id attribute",
	None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
	if "example" not in gc.attrib:
	error(child, "Substitutions must define example attribute",
	None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
	else:
	if gc.text and re.search("%[^%]", gc.text): badsub = True
	if badsub:
	error(child, "Substitutions must be inside xliff tags",
	text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")

	return warnings

	if len(sys.argv) > 2:
	before = lint(sys.argv[2])
	else:
	before = {}
	after = lint(sys.argv[1])

	for b in before:
	if b in after:
	del after[b]

	if len(after) > 0:
	for a in sorted(after.keys()):
	print after[a]
	print
	sys.exit(1)