| #!/usr/bin/env python |
| # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| '''Utilities used by GRIT. |
| ''' |
| |
| import codecs |
| import htmlentitydefs |
| import os |
| import re |
| import shutil |
| import sys |
| import tempfile |
| import time |
| import types |
| from xml.sax import saxutils |
| |
| from grit import lazy_re |
| |
| _root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) |
| |
| |
| # Unique constants for use by ReadFile(). |
| BINARY, RAW_TEXT = range(2) |
| |
| |
| # Unique constants representing data pack encodings. |
| _, UTF8, UTF16 = range(3) |
| |
| |
| def Encode(message, encoding): |
| '''Returns a byte stream that represents |message| in the given |encoding|.''' |
| # |message| is a python unicode string, so convert to a byte stream that |
| # has the correct encoding requested for the datapacks. We skip the first |
| # 2 bytes of text resources because it is the BOM. |
| if encoding == UTF8: |
| return message.encode('utf8') |
| if encoding == UTF16: |
| return message.encode('utf16')[2:] |
| # Default is BINARY |
| return message |
| |
| |
| # Matches all different types of linebreaks. |
| LINEBREAKS = re.compile('\r\n|\n|\r') |
| |
| def MakeRelativePath(base_path, path_to_make_relative): |
| """Returns a relative path such from the base_path to |
| the path_to_make_relative. |
| |
| In other words, os.join(base_path, |
| MakeRelativePath(base_path, path_to_make_relative)) |
| is the same location as path_to_make_relative. |
| |
| Args: |
| base_path: the root path |
| path_to_make_relative: an absolute path that is on the same drive |
| as base_path |
| """ |
| |
| def _GetPathAfterPrefix(prefix_path, path_with_prefix): |
| """Gets the subpath within in prefix_path for the path_with_prefix |
| with no beginning or trailing path separators. |
| |
| Args: |
| prefix_path: the base path |
| path_with_prefix: a path that starts with prefix_path |
| """ |
| assert path_with_prefix.startswith(prefix_path) |
| path_without_prefix = path_with_prefix[len(prefix_path):] |
| normalized_path = os.path.normpath(path_without_prefix.strip(os.path.sep)) |
| if normalized_path == '.': |
| normalized_path = '' |
| return normalized_path |
| |
| def _GetCommonBaseDirectory(*args): |
| """Returns the common prefix directory for the given paths |
| |
| Args: |
| The list of paths (at least one of which should be a directory) |
| """ |
| prefix = os.path.commonprefix(args) |
| # prefix is a character-by-character prefix (i.e. it does not end |
| # on a directory bound, so this code fixes that) |
| |
| # if the prefix ends with the separator, then it is prefect. |
| if len(prefix) > 0 and prefix[-1] == os.path.sep: |
| return prefix |
| |
| # We need to loop through all paths or else we can get |
| # tripped up by "c:\a" and "c:\abc". The common prefix |
| # is "c:\a" which is a directory and looks good with |
| # respect to the first directory but it is clear that |
| # isn't a common directory when the second path is |
| # examined. |
| for path in args: |
| assert len(path) >= len(prefix) |
| # If the prefix the same length as the path, |
| # then the prefix must be a directory (since one |
| # of the arguements should be a directory). |
| if path == prefix: |
| continue |
| # if the character after the prefix in the path |
| # is the separator, then the prefix appears to be a |
| # valid a directory as well for the given path |
| if path[len(prefix)] == os.path.sep: |
| continue |
| # Otherwise, the prefix is not a directory, so it needs |
| # to be shortened to be one |
| index_sep = prefix.rfind(os.path.sep) |
| # The use "index_sep + 1" because it includes the final sep |
| # and it handles the case when the index_sep is -1 as well |
| prefix = prefix[:index_sep + 1] |
| # At this point we backed up to a directory bound which is |
| # common to all paths, so we can quit going through all of |
| # the paths. |
| break |
| return prefix |
| |
| prefix = _GetCommonBaseDirectory(base_path, path_to_make_relative) |
| # If the paths had no commonality at all, then return the absolute path |
| # because it is the best that can be done. If the path had to be relative |
| # then eventually this absolute path will be discovered (when a build breaks) |
| # and an appropriate fix can be made, but having this allows for the best |
| # backward compatibility with the absolute path behavior in the past. |
| if len(prefix) <= 0: |
| return path_to_make_relative |
| # Build a path from the base dir to the common prefix |
| remaining_base_path = _GetPathAfterPrefix(prefix, base_path) |
| |
| # The follow handles two case: "" and "foo\\bar" |
| path_pieces = remaining_base_path.split(os.path.sep) |
| base_depth_from_prefix = len([d for d in path_pieces if len(d)]) |
| base_to_prefix = (".." + os.path.sep) * base_depth_from_prefix |
| |
| # Put add in the path from the prefix to the path_to_make_relative |
| remaining_other_path = _GetPathAfterPrefix(prefix, path_to_make_relative) |
| return base_to_prefix + remaining_other_path |
| |
| |
| KNOWN_SYSTEM_IDENTIFIERS = set() |
| |
| SYSTEM_IDENTIFIERS = None |
| |
| def SetupSystemIdentifiers(ids): |
| '''Adds ids to a regexp of known system identifiers. |
| |
| Can be called many times, ids will be accumulated. |
| |
| Args: |
| ids: an iterable of strings |
| ''' |
| KNOWN_SYSTEM_IDENTIFIERS.update(ids) |
| global SYSTEM_IDENTIFIERS |
| SYSTEM_IDENTIFIERS = lazy_re.compile( |
| ' | '.join([r'\b%s\b' % i for i in KNOWN_SYSTEM_IDENTIFIERS]), |
| re.VERBOSE) |
| |
| |
| # Matches all of the resource IDs predefined by Windows. |
| SetupSystemIdentifiers(( |
| 'IDOK', 'IDCANCEL', 'IDC_STATIC', 'IDYES', 'IDNO', |
| 'ID_FILE_NEW', 'ID_FILE_OPEN', 'ID_FILE_CLOSE', 'ID_FILE_SAVE', |
| 'ID_FILE_SAVE_AS', 'ID_FILE_PAGE_SETUP', 'ID_FILE_PRINT_SETUP', |
| 'ID_FILE_PRINT', 'ID_FILE_PRINT_DIRECT', 'ID_FILE_PRINT_PREVIEW', |
| 'ID_FILE_UPDATE', 'ID_FILE_SAVE_COPY_AS', 'ID_FILE_SEND_MAIL', |
| 'ID_FILE_MRU_FIRST', 'ID_FILE_MRU_LAST', |
| 'ID_EDIT_CLEAR', 'ID_EDIT_CLEAR_ALL', 'ID_EDIT_COPY', |
| 'ID_EDIT_CUT', 'ID_EDIT_FIND', 'ID_EDIT_PASTE', 'ID_EDIT_PASTE_LINK', |
| 'ID_EDIT_PASTE_SPECIAL', 'ID_EDIT_REPEAT', 'ID_EDIT_REPLACE', |
| 'ID_EDIT_SELECT_ALL', 'ID_EDIT_UNDO', 'ID_EDIT_REDO', |
| 'VS_VERSION_INFO', 'IDRETRY', |
| 'ID_APP_ABOUT', 'ID_APP_EXIT', |
| 'ID_NEXT_PANE', 'ID_PREV_PANE', |
| 'ID_WINDOW_NEW', 'ID_WINDOW_ARRANGE', 'ID_WINDOW_CASCADE', |
| 'ID_WINDOW_TILE_HORZ', 'ID_WINDOW_TILE_VERT', 'ID_WINDOW_SPLIT', |
| 'ATL_IDS_SCSIZE', 'ATL_IDS_SCMOVE', 'ATL_IDS_SCMINIMIZE', |
| 'ATL_IDS_SCMAXIMIZE', 'ATL_IDS_SCNEXTWINDOW', 'ATL_IDS_SCPREVWINDOW', |
| 'ATL_IDS_SCCLOSE', 'ATL_IDS_SCRESTORE', 'ATL_IDS_SCTASKLIST', |
| 'ATL_IDS_MDICHILD', 'ATL_IDS_IDLEMESSAGE', 'ATL_IDS_MRU_FILE' )) |
| |
| |
| # Matches character entities, whether specified by name, decimal or hex. |
| _HTML_ENTITY = lazy_re.compile( |
| '&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));', |
| re.IGNORECASE) |
| |
| # Matches characters that should be HTML-escaped. This is <, > and &, but only |
| # if the & is not the start of an HTML character entity. |
| _HTML_CHARS_TO_ESCAPE = lazy_re.compile( |
| '"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)', |
| re.IGNORECASE | re.MULTILINE) |
| |
| |
| def ReadFile(filename, encoding): |
| '''Reads and returns the entire contents of the given file. |
| |
| Args: |
| filename: The path to the file. |
| encoding: A Python codec name or one of two special values: BINARY to read |
| the file in binary mode, or RAW_TEXT to read it with newline |
| conversion but without decoding to Unicode. |
| ''' |
| mode = 'rb' if encoding == BINARY else 'rU' |
| with open(filename, mode) as f: |
| data = f.read() |
| if encoding not in (BINARY, RAW_TEXT): |
| data = data.decode(encoding) |
| return data |
| |
| |
| def WrapOutputStream(stream, encoding = 'utf-8'): |
| '''Returns a stream that wraps the provided stream, making it write |
| characters using the specified encoding.''' |
| return codecs.getwriter(encoding)(stream) |
| |
| |
| def ChangeStdoutEncoding(encoding = 'utf-8'): |
| '''Changes STDOUT to print characters using the specified encoding.''' |
| sys.stdout = WrapOutputStream(sys.stdout, encoding) |
| |
| |
| def EscapeHtml(text, escape_quotes = False): |
| '''Returns 'text' with <, > and & (and optionally ") escaped to named HTML |
| entities. Any existing named entity or HTML entity defined by decimal or |
| hex code will be left untouched. This is appropriate for escaping text for |
| inclusion in HTML, but not for XML. |
| ''' |
| def Replace(match): |
| if match.group() == '&': return '&' |
| elif match.group() == '<': return '<' |
| elif match.group() == '>': return '>' |
| elif match.group() == '"': |
| if escape_quotes: return '"' |
| else: return match.group() |
| else: assert False |
| out = _HTML_CHARS_TO_ESCAPE.sub(Replace, text) |
| return out |
| |
| |
| def UnescapeHtml(text, replace_nbsp=True): |
| '''Returns 'text' with all HTML character entities (both named character |
| entities and those specified by decimal or hexadecimal Unicode ordinal) |
| replaced by their Unicode characters (or latin1 characters if possible). |
| |
| The only exception is that will not be escaped if 'replace_nbsp' is |
| False. |
| ''' |
| def Replace(match): |
| groups = match.groupdict() |
| if groups['hex']: |
| return unichr(int(groups['hex'], 16)) |
| elif groups['decimal']: |
| return unichr(int(groups['decimal'], 10)) |
| else: |
| name = groups['named'] |
| if name == 'nbsp' and not replace_nbsp: |
| return match.group() # Don't replace |
| assert name != None |
| if name in htmlentitydefs.name2codepoint.keys(): |
| return unichr(htmlentitydefs.name2codepoint[name]) |
| else: |
| return match.group() # Unknown HTML character entity - don't replace |
| |
| out = _HTML_ENTITY.sub(Replace, text) |
| return out |
| |
| |
| def EncodeCdata(cdata): |
| '''Returns the provided cdata in either escaped format or <![CDATA[xxx]]> |
| format, depending on which is more appropriate for easy editing. The data |
| is escaped for inclusion in an XML element's body. |
| |
| Args: |
| cdata: 'If x < y and y < z then x < z' |
| |
| Return: |
| '<![CDATA[If x < y and y < z then x < z]]>' |
| ''' |
| if cdata.count('<') > 1 or cdata.count('>') > 1 and cdata.count(']]>') == 0: |
| return '<![CDATA[%s]]>' % cdata |
| else: |
| return saxutils.escape(cdata) |
| |
| |
| def FixupNamedParam(function, param_name, param_value): |
| '''Returns a closure that is identical to 'function' but ensures that the |
| named parameter 'param_name' is always set to 'param_value' unless explicitly |
| set by the caller. |
| |
| Args: |
| function: callable |
| param_name: 'bingo' |
| param_value: 'bongo' (any type) |
| |
| Return: |
| callable |
| ''' |
| def FixupClosure(*args, **kw): |
| if not param_name in kw: |
| kw[param_name] = param_value |
| return function(*args, **kw) |
| return FixupClosure |
| |
| |
| def PathFromRoot(path): |
| '''Takes a path relative to the root directory for GRIT (the one that grit.py |
| resides in) and returns a path that is either absolute or relative to the |
| current working directory (i.e .a path you can use to open the file). |
| |
| Args: |
| path: 'rel_dir\file.ext' |
| |
| Return: |
| 'c:\src\tools\rel_dir\file.ext |
| ''' |
| return os.path.normpath(os.path.join(_root_dir, path)) |
| |
| |
| def ParseGrdForUnittest(body, base_dir=None): |
| '''Parse a skeleton .grd file and return it, for use in unit tests. |
| |
| Args: |
| body: XML that goes inside the <release> element. |
| base_dir: The base_dir attribute of the <grit> tag. |
| ''' |
| import StringIO |
| from grit import grd_reader |
| if isinstance(body, unicode): |
| body = body.encode('utf-8') |
| if base_dir is None: |
| base_dir = PathFromRoot('.') |
| body = '''<?xml version="1.0" encoding="UTF-8"?> |
| <grit latest_public_release="2" current_release="3" source_lang_id="en" base_dir="%s"> |
| <outputs> |
| </outputs> |
| <release seq="3"> |
| %s |
| </release> |
| </grit>''' % (base_dir, body) |
| return grd_reader.Parse(StringIO.StringIO(body), dir=".") |
| |
| |
| def StripBlankLinesAndComments(text): |
| '''Strips blank lines and comments from C source code, for unit tests.''' |
| return '\n'.join(line for line in text.splitlines() |
| if line and not line.startswith('//')) |
| |
| |
| def dirname(filename): |
| '''Version of os.path.dirname() that never returns empty paths (returns |
| '.' if the result of os.path.dirname() is empty). |
| ''' |
| ret = os.path.dirname(filename) |
| if ret == '': |
| ret = '.' |
| return ret |
| |
| |
| def normpath(path): |
| '''Version of os.path.normpath that also changes backward slashes to |
| forward slashes when not running on Windows. |
| ''' |
| # This is safe to always do because the Windows version of os.path.normpath |
| # will replace forward slashes with backward slashes. |
| path = path.replace('\\', '/') |
| return os.path.normpath(path) |
| |
| |
| _LANGUAGE_SPLIT_RE = lazy_re.compile('-|_|/') |
| |
| |
| def CanonicalLanguage(code): |
| '''Canonicalizes two-part language codes by using a dash and making the |
| second part upper case. Returns one-part language codes unchanged. |
| |
| Args: |
| code: 'zh_cn' |
| |
| Return: |
| code: 'zh-CN' |
| ''' |
| parts = _LANGUAGE_SPLIT_RE.split(code) |
| code = [ parts[0] ] |
| for part in parts[1:]: |
| code.append(part.upper()) |
| return '-'.join(code) |
| |
| |
| _LANG_TO_CODEPAGE = { |
| 'en' : 1252, |
| 'fr' : 1252, |
| 'it' : 1252, |
| 'de' : 1252, |
| 'es' : 1252, |
| 'nl' : 1252, |
| 'sv' : 1252, |
| 'no' : 1252, |
| 'da' : 1252, |
| 'fi' : 1252, |
| 'pt-BR' : 1252, |
| 'ru' : 1251, |
| 'ja' : 932, |
| 'zh-TW' : 950, |
| 'zh-CN' : 936, |
| 'ko' : 949, |
| } |
| |
| |
| def LanguageToCodepage(lang): |
| '''Returns the codepage _number_ that can be used to represent 'lang', which |
| may be either in formats such as 'en', 'pt_br', 'pt-BR', etc. |
| |
| The codepage returned will be one of the 'cpXXXX' codepage numbers. |
| |
| Args: |
| lang: 'de' |
| |
| Return: |
| 1252 |
| ''' |
| lang = CanonicalLanguage(lang) |
| if lang in _LANG_TO_CODEPAGE: |
| return _LANG_TO_CODEPAGE[lang] |
| else: |
| print "Not sure which codepage to use for %s, assuming cp1252" % lang |
| return 1252 |
| |
| def NewClassInstance(class_name, class_type): |
| '''Returns an instance of the class specified in classname |
| |
| Args: |
| class_name: the fully qualified, dot separated package + classname, |
| i.e. "my.package.name.MyClass". Short class names are not supported. |
| class_type: the class or superclass this object must implement |
| |
| Return: |
| An instance of the class, or None if none was found |
| ''' |
| lastdot = class_name.rfind('.') |
| module_name = '' |
| if lastdot >= 0: |
| module_name = class_name[0:lastdot] |
| if module_name: |
| class_name = class_name[lastdot+1:] |
| module = __import__(module_name, globals(), locals(), ['']) |
| if hasattr(module, class_name): |
| class_ = getattr(module, class_name) |
| class_instance = class_() |
| if isinstance(class_instance, class_type): |
| return class_instance |
| return None |
| |
| |
| def FixLineEnd(text, line_end): |
| # First normalize |
| text = text.replace('\r\n', '\n') |
| text = text.replace('\r', '\n') |
| # Then fix |
| text = text.replace('\n', line_end) |
| return text |
| |
| |
| def BoolToString(bool): |
| if bool: |
| return 'true' |
| else: |
| return 'false' |
| |
| |
| verbose = False |
| extra_verbose = False |
| |
| def IsVerbose(): |
| return verbose |
| |
| def IsExtraVerbose(): |
| return extra_verbose |
| |
| def ParseDefine(define): |
| '''Parses a define argument and returns the name and value. |
| |
| The format is either "NAME=VAL" or "NAME", using True as the default value. |
| Values of "1" and "0" are transformed to True and False respectively. |
| |
| Args: |
| define: a string of the form "NAME=VAL" or "NAME". |
| |
| Returns: |
| A (name, value) pair. name is a string, value a string or boolean. |
| ''' |
| parts = [part.strip() for part in define.split('=', 1)] |
| assert len(parts) >= 1 |
| name = parts[0] |
| val = True |
| if len(parts) > 1: |
| val = parts[1] |
| if val == "1": val = True |
| elif val == "0": val = False |
| return (name, val) |
| |
| |
| class Substituter(object): |
| '''Finds and substitutes variable names in text strings. |
| |
| Given a dictionary of variable names and values, prepares to |
| search for patterns of the form [VAR_NAME] in a text. |
| The value will be substituted back efficiently. |
| Also applies to tclib.Message objects. |
| ''' |
| |
| def __init__(self): |
| '''Create an empty substituter.''' |
| self.substitutions_ = {} |
| self.dirty_ = True |
| |
| def AddSubstitutions(self, subs): |
| '''Add new values to the substitutor. |
| |
| Args: |
| subs: A dictionary of new substitutions. |
| ''' |
| self.substitutions_.update(subs) |
| self.dirty_ = True |
| |
| def AddMessages(self, messages, lang): |
| '''Adds substitutions extracted from node.Message objects. |
| |
| Args: |
| messages: a list of node.Message objects. |
| lang: The translation language to use in substitutions. |
| ''' |
| subs = [(str(msg.attrs['name']), msg.Translate(lang)) for msg in messages] |
| self.AddSubstitutions(dict(subs)) |
| self.dirty_ = True |
| |
| def GetExp(self): |
| '''Obtain a regular expression that will find substitution keys in text. |
| |
| Create and cache if the substituter has been updated. Use the cached value |
| otherwise. Keys will be enclosed in [square brackets] in text. |
| |
| Returns: |
| A regular expression object. |
| ''' |
| if self.dirty_: |
| components = ['\[%s\]' % (k,) for k in self.substitutions_.keys()] |
| self.exp = re.compile("(%s)" % ('|'.join(components),)) |
| self.dirty_ = False |
| return self.exp |
| |
| def Substitute(self, text): |
| '''Substitute the variable values in the given text. |
| |
| Text of the form [message_name] will be replaced by the message's value. |
| |
| Args: |
| text: A string of text. |
| |
| Returns: |
| A string of text with substitutions done. |
| ''' |
| return ''.join([self._SubFragment(f) for f in self.GetExp().split(text)]) |
| |
| def _SubFragment(self, fragment): |
| '''Utility function for Substitute. |
| |
| Performs a simple substitution if the fragment is exactly of the form |
| [message_name]. |
| |
| Args: |
| fragment: A simple string. |
| |
| Returns: |
| A string with the substitution done. |
| ''' |
| if len(fragment) > 2 and fragment[0] == '[' and fragment[-1] == ']': |
| sub = self.substitutions_.get(fragment[1:-1], None) |
| if sub is not None: |
| return sub |
| return fragment |
| |
| def SubstituteMessage(self, msg): |
| '''Apply substitutions to a tclib.Message object. |
| |
| Text of the form [message_name] will be replaced by a new placeholder, |
| whose presentation will take the form the message_name_{UsageCount}, and |
| whose example will be the message's value. Existing placeholders are |
| not affected. |
| |
| Args: |
| msg: A tclib.Message object. |
| |
| Returns: |
| A tclib.Message object, with substitutions done. |
| ''' |
| from grit import tclib # avoid circular import |
| counts = {} |
| text = msg.GetPresentableContent() |
| placeholders = [] |
| newtext = '' |
| for f in self.GetExp().split(text): |
| sub = self._SubFragment(f) |
| if f != sub: |
| f = str(f) |
| count = counts.get(f, 0) + 1 |
| counts[f] = count |
| name = "%s_%d" % (f[1:-1], count) |
| placeholders.append(tclib.Placeholder(name, f, sub)) |
| newtext += name |
| else: |
| newtext += f |
| if placeholders: |
| return tclib.Message(newtext, msg.GetPlaceholders() + placeholders, |
| msg.GetDescription(), msg.GetMeaning()) |
| else: |
| return msg |
| |
| |
| class TempDir(object): |
| '''Creates files with the specified contents in a temporary directory, |
| for unit testing. |
| ''' |
| def __init__(self, file_data): |
| self._tmp_dir_name = tempfile.mkdtemp() |
| assert not os.listdir(self.GetPath()) |
| for name, contents in file_data.items(): |
| file_path = self.GetPath(name) |
| dir_path = os.path.split(file_path)[0] |
| if not os.path.exists(dir_path): |
| os.makedirs(dir_path) |
| with open(file_path, 'w') as f: |
| f.write(file_data[name]) |
| |
| def __enter__(self): |
| return self |
| |
| def __exit__(self, *exc_info): |
| self.CleanUp() |
| |
| def CleanUp(self): |
| shutil.rmtree(self.GetPath()) |
| |
| def GetPath(self, name=''): |
| name = os.path.join(self._tmp_dir_name, name) |
| assert name.startswith(self._tmp_dir_name) |
| return name |
| |
| def AsCurrentDir(self): |
| return self._AsCurrentDirClass(self.GetPath()) |
| |
| class _AsCurrentDirClass(object): |
| def __init__(self, path): |
| self.path = path |
| def __enter__(self): |
| self.oldpath = os.getcwd() |
| os.chdir(self.path) |
| def __exit__(self, *exc_info): |
| os.chdir(self.oldpath) |