| #!/usr/bin/env python |
| # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| '''The 'grit rc2grd' tool.''' |
| |
| |
| import os.path |
| import getopt |
| import re |
| import StringIO |
| import types |
| |
| import grit.node.empty |
| from grit.node import include |
| from grit.node import structure |
| from grit.node import message |
| |
| from grit.gather import rc |
| from grit.gather import tr_html |
| |
| from grit.tool import interface |
| from grit.tool import postprocess_interface |
| from grit.tool import preprocess_interface |
| |
| from grit import grd_reader |
| from grit import lazy_re |
| from grit import tclib |
| from grit import util |
| |
| |
| # Matches files referenced from an .rc file |
| _FILE_REF = lazy_re.compile(''' |
| ^(?P<id>[A-Z_0-9.]+)[ \t]+ |
| (?P<type>[A-Z_0-9]+)[ \t]+ |
| "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE) |
| |
| |
| # Matches a dialog section |
| _DIALOG = lazy_re.compile( |
| '^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$', |
| re.MULTILINE | re.DOTALL) |
| |
| |
| # Matches a menu section |
| _MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$', |
| re.MULTILINE | re.DOTALL) |
| |
| |
| # Matches a versioninfo section |
| _VERSIONINFO = lazy_re.compile( |
| '^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^END\s*$', |
| re.MULTILINE | re.DOTALL) |
| |
| |
| # Matches a stringtable |
| _STRING_TABLE = lazy_re.compile( |
| ('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS.+|LANGUAGE.+|' |
| 'VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$'), |
| re.MULTILINE | re.DOTALL) |
| |
| |
| # Matches each message inside a stringtable, breaking it up into comments, |
| # the ID of the message, and the (RC-escaped) message text. |
| _MESSAGE = lazy_re.compile(''' |
| (?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message |
| ^\s* |
| (?P<id>[A-Za-z0-9_]+) # id |
| \s+ |
| "(?P<text>.*?([^"]|""))"([^"]|$) # The message itself |
| ''', re.MULTILINE | re.DOTALL | re.VERBOSE) |
| |
| |
| # Matches each line of comment text in a multi-line comment. |
| _COMMENT_TEXT = lazy_re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE) |
| |
| |
| # Matches a string that is empty or all whitespace |
| _WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE) |
| |
| |
| # Finds printf and FormatMessage style format specifiers |
| # Uses non-capturing groups except for the outermost group, so the output of |
| # re.split() should include both the normal text and what we intend to |
| # replace with placeholders. |
| # TODO(joi) Check documentation for printf (and Windows variants) and FormatMessage |
| _FORMAT_SPECIFIER = lazy_re.compile( |
| '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last char |
| '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)' # printf last char |
| '|\$[1-9][0-9]*)') # FormatMessage |
| |
| |
| class Rc2Grd(interface.Tool): |
| '''A tool for converting .rc files to .grd files. This tool is only for |
| converting the source (nontranslated) .rc file to a .grd file. For importing |
| existing translations, use the rc2xtb tool. |
| |
| Usage: grit [global options] rc2grd [OPTIONS] RCFILE |
| |
| The tool takes a single argument, which is the path to the .rc file to convert. |
| It outputs a .grd file with the same name in the same directory as the .rc file. |
| The .grd file may have one or more TODO comments for things that have to be |
| cleaned up manually. |
| |
| OPTIONS may be any of the following: |
| |
| -e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'. |
| |
| -h TYPE Specify the TYPE attribute for HTML structures. |
| Default is 'tr_html'. |
| |
| -u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'. |
| |
| -n MATCH Specify the regular expression to match in comments that will |
| indicate that the resource the comment belongs to is not |
| translateable. Default is 'Not locali(s|z)able'. |
| |
| -r GRDFILE Specify that GRDFILE should be used as a "role model" for |
| any placeholders that otherwise would have had TODO names. |
| This attempts to find an identical message in the GRDFILE |
| and uses that instead of the automatically placeholderized |
| message. |
| |
| --pre CLASS Specify an optional, fully qualified classname, which |
| has to be a subclass of grit.tool.PreProcessor, to |
| run on the text of the RC file before conversion occurs. |
| This can be used to support constructs in the RC files |
| that GRIT cannot handle on its own. |
| |
| --post CLASS Specify an optional, fully qualified classname, which |
| has to be a subclass of grit.tool.PostProcessor, to |
| run on the text of the converted RC file. |
| This can be used to alter the content of the RC file |
| based on the conversion that occured. |
| |
| For menus, dialogs and version info, the .grd file will refer to the original |
| .rc file. Once conversion is complete, you can strip the original .rc file |
| of its string table and all comments as these will be available in the .grd |
| file. |
| |
| Note that this tool WILL NOT obey C preprocessor rules, so even if something |
| is #if 0-ed out it will still be included in the output of this tool |
| Therefore, if your .rc file contains sections like this, you should run the |
| C preprocessor on the .rc file or manually edit it before using this tool. |
| ''' |
| |
| def ShortDescription(self): |
| return 'A tool for converting .rc source files to .grd files.' |
| |
| def __init__(self): |
| self.input_encoding = 'cp1252' |
| self.html_type = 'tr_html' |
| self.html_encoding = 'utf-8' |
| self.not_localizable_re = re.compile('Not locali(s|z)able') |
| self.role_model = None |
| self.pre_process = None |
| self.post_process = None |
| |
| def ParseOptions(self, args): |
| '''Given a list of arguments, set this object's options and return |
| all non-option arguments. |
| ''' |
| (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post=']) |
| for (key, val) in own_opts: |
| if key == '-e': |
| self.input_encoding = val |
| elif key == '-h': |
| self.html_type = val |
| elif key == '-u': |
| self.html_encoding = val |
| elif key == '-n': |
| self.not_localizable_re = re.compile(val) |
| elif key == '-r': |
| self.role_model = grd_reader.Parse(val) |
| elif key == '--pre': |
| self.pre_process = val |
| elif key == '--post': |
| self.post_process = val |
| return args |
| |
| def Run(self, opts, args): |
| args = self.ParseOptions(args) |
| if len(args) != 1: |
| print ('This tool takes a single tool-specific argument, the path to the\n' |
| '.rc file to process.') |
| return 2 |
| self.SetOptions(opts) |
| |
| path = args[0] |
| out_path = os.path.join(util.dirname(path), |
| os.path.splitext(os.path.basename(path))[0] + '.grd') |
| |
| rctext = util.ReadFile(path, self.input_encoding) |
| grd_text = unicode(self.Process(rctext, path)) |
| with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile: |
| outfile.write(grd_text) |
| |
| print 'Wrote output file %s.\nPlease check for TODO items in the file.' % out_path |
| |
| |
| def Process(self, rctext, rc_path): |
| '''Processes 'rctext' and returns a resource tree corresponding to it. |
| |
| Args: |
| rctext: complete text of the rc file |
| rc_path: 'resource\resource.rc' |
| |
| Return: |
| grit.node.base.Node subclass |
| ''' |
| |
| if self.pre_process: |
| preprocess_class = util.NewClassInstance(self.pre_process, |
| preprocess_interface.PreProcessor) |
| if preprocess_class: |
| rctext = preprocess_class.Process(rctext, rc_path) |
| else: |
| self.Out( |
| 'PreProcessing class could not be found. Skipping preprocessing.\n') |
| |
| # Start with a basic skeleton for the .grd file |
| root = grd_reader.Parse(StringIO.StringIO( |
| '''<?xml version="1.0" encoding="UTF-8"?> |
| <grit base_dir="." latest_public_release="0" |
| current_release="1" source_lang_id="en"> |
| <outputs /> |
| <translations /> |
| <release seq="1"> |
| <includes /> |
| <structures /> |
| <messages /> |
| </release> |
| </grit>'''), util.dirname(rc_path)) |
| includes = root.children[2].children[0] |
| structures = root.children[2].children[1] |
| messages = root.children[2].children[2] |
| assert (isinstance(includes, grit.node.empty.IncludesNode) and |
| isinstance(structures, grit.node.empty.StructuresNode) and |
| isinstance(messages, grit.node.empty.MessagesNode)) |
| |
| self.AddIncludes(rctext, includes) |
| self.AddStructures(rctext, structures, os.path.basename(rc_path)) |
| self.AddMessages(rctext, messages) |
| |
| self.VerboseOut('Validating that all IDs are unique...\n') |
| root.ValidateUniqueIds() |
| self.ExtraVerboseOut('Done validating that all IDs are unique.\n') |
| |
| if self.post_process: |
| postprocess_class = util.NewClassInstance(self.post_process, |
| postprocess_interface.PostProcessor) |
| if postprocess_class: |
| root = postprocess_class.Process(rctext, rc_path, root) |
| else: |
| self.Out( |
| 'PostProcessing class could not be found. Skipping postprocessing.\n') |
| |
| return root |
| |
| |
| def IsHtml(self, res_type, fname): |
| '''Check whether both the type and file extension indicate HTML''' |
| fext = fname.split('.')[-1].lower() |
| return res_type == 'HTML' and fext in ('htm', 'html') |
| |
| |
| def AddIncludes(self, rctext, node): |
| '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and |
| adds each included resource as an <include> child node of 'node'.''' |
| for m in _FILE_REF.finditer(rctext): |
| id = m.group('id') |
| res_type = m.group('type').upper() |
| fname = rc.Section.UnEscape(m.group('file')) |
| assert fname.find('\n') == -1 |
| if not self.IsHtml(res_type, fname): |
| self.VerboseOut('Processing %s with ID %s (filename: %s)\n' % |
| (res_type, id, fname)) |
| node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname)) |
| |
| |
| def AddStructures(self, rctext, node, rc_filename): |
| '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version |
| information resources and HTML templates) and adds each as a <structure> |
| child of 'node'.''' |
| # First add HTML includes |
| for m in _FILE_REF.finditer(rctext): |
| id = m.group('id') |
| res_type = m.group('type').upper() |
| fname = rc.Section.UnEscape(m.group('file')) |
| if self.IsHtml(type, fname): |
| node.AddChild(structure.StructureNode.Construct( |
| node, id, self.html_type, fname, self.html_encoding)) |
| |
| # Then add all RC includes |
| def AddStructure(res_type, id): |
| self.VerboseOut('Processing %s with ID %s\n' % (res_type, id)) |
| node.AddChild(structure.StructureNode.Construct(node, id, res_type, |
| rc_filename, |
| encoding=self.input_encoding)) |
| for m in _MENU.finditer(rctext): |
| AddStructure('menu', m.group('id')) |
| for m in _DIALOG.finditer(rctext): |
| AddStructure('dialog', m.group('id')) |
| for m in _VERSIONINFO.finditer(rctext): |
| AddStructure('version', m.group('id')) |
| |
| |
| def AddMessages(self, rctext, node): |
| '''Scans 'rctext' for all messages in string tables, preprocesses them as |
| much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d |
| type format specifiers get those specifiers replaced with placeholders, and |
| HTML-formatted messages get run through the HTML-placeholderizer). Adds |
| each message as a <message> node child of 'node'.''' |
| for tm in _STRING_TABLE.finditer(rctext): |
| table = tm.group('body') |
| for mm in _MESSAGE.finditer(table): |
| comment_block = mm.group('comment') |
| comment_text = [] |
| for cm in _COMMENT_TEXT.finditer(comment_block): |
| comment_text.append(cm.group('text')) |
| comment_text = ' '.join(comment_text) |
| |
| id = mm.group('id') |
| text = rc.Section.UnEscape(mm.group('text')) |
| |
| self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text)) |
| |
| msg_obj = self.Placeholderize(text) |
| |
| # Messages that contain only placeholders do not need translation. |
| is_translateable = False |
| for item in msg_obj.GetContent(): |
| if isinstance(item, types.StringTypes): |
| if not _WHITESPACE_ONLY.match(item): |
| is_translateable = True |
| |
| if self.not_localizable_re.search(comment_text): |
| is_translateable = False |
| |
| message_meaning = '' |
| internal_comment = '' |
| |
| # If we have a "role model" (existing GRD file) and this node exists |
| # in the role model, use the description, meaning and translateable |
| # attributes from the role model. |
| if self.role_model: |
| role_node = self.role_model.GetNodeById(id) |
| if role_node: |
| is_translateable = role_node.IsTranslateable() |
| message_meaning = role_node.attrs['meaning'] |
| comment_text = role_node.attrs['desc'] |
| internal_comment = role_node.attrs['internal_comment'] |
| |
| # For nontranslateable messages, we don't want the complexity of |
| # placeholderizing everything. |
| if not is_translateable: |
| msg_obj = tclib.Message(text=text) |
| |
| msg_node = message.MessageNode.Construct(node, msg_obj, id, |
| desc=comment_text, |
| translateable=is_translateable, |
| meaning=message_meaning) |
| msg_node.attrs['internal_comment'] = internal_comment |
| |
| node.AddChild(msg_node) |
| self.ExtraVerboseOut('Done processing message %s\n' % id) |
| |
| |
| def Placeholderize(self, text): |
| '''Creates a tclib.Message object from 'text', attempting to recognize |
| a few different formats of text that can be automatically placeholderized |
| (HTML code, printf-style format strings, and FormatMessage-style format |
| strings). |
| ''' |
| |
| try: |
| # First try HTML placeholderizing. |
| # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing |
| msg = tr_html.HtmlToMessage(text, True) |
| for item in msg.GetContent(): |
| if not isinstance(item, types.StringTypes): |
| return msg # Contained at least one placeholder, so we're done |
| |
| # HTML placeholderization didn't do anything, so try to find printf or |
| # FormatMessage format specifiers and change them into placeholders. |
| msg = tclib.Message() |
| parts = _FORMAT_SPECIFIER.split(text) |
| todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc. |
| for part in parts: |
| if _FORMAT_SPECIFIER.match(part): |
| msg.AppendPlaceholder(tclib.Placeholder( |
| 'TODO_%04d' % todo_counter, part, 'TODO')) |
| todo_counter += 1 |
| elif part != '': |
| msg.AppendText(part) |
| |
| if self.role_model and len(parts) > 1: # there are TODO placeholders |
| role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText( |
| msg.GetRealContent(), '') |
| if role_model_msg: |
| # replace wholesale to get placeholder names and examples |
| msg = role_model_msg |
| |
| return msg |
| except: |
| print 'Exception processing message with text "%s"' % text |
| raise |
| |