blob: 10d36f600b0edae030136768681dc88ce6d5cd9d [file] [log] [blame]
#!/usr/bin/env python
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
'''The 'grit rc2grd' tool.'''
import os.path
import getopt
import re
import StringIO
import types
import grit.node.empty
from grit.node import include
from grit.node import structure
from grit.node import message
from grit.gather import rc
from grit.gather import tr_html
from grit.tool import interface
from grit.tool import postprocess_interface
from grit.tool import preprocess_interface
from grit import grd_reader
from grit import lazy_re
from grit import tclib
from grit import util
# Matches files referenced from an .rc file
_FILE_REF = lazy_re.compile('''
^(?P<id>[A-Z_0-9.]+)[ \t]+
(?P<type>[A-Z_0-9]+)[ \t]+
"(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE)
# Matches a dialog section
_DIALOG = lazy_re.compile(
'^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$',
re.MULTILINE | re.DOTALL)
# Matches a menu section
_MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$',
re.MULTILINE | re.DOTALL)
# Matches a versioninfo section
_VERSIONINFO = lazy_re.compile(
'^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^END\s*$',
re.MULTILINE | re.DOTALL)
# Matches a stringtable
_STRING_TABLE = lazy_re.compile(
('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS.+|LANGUAGE.+|'
'VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$'),
re.MULTILINE | re.DOTALL)
# Matches each message inside a stringtable, breaking it up into comments,
# the ID of the message, and the (RC-escaped) message text.
_MESSAGE = lazy_re.compile('''
(?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message
^\s*
(?P<id>[A-Za-z0-9_]+) # id
\s+
"(?P<text>.*?([^"]|""))"([^"]|$) # The message itself
''', re.MULTILINE | re.DOTALL | re.VERBOSE)
# Matches each line of comment text in a multi-line comment.
_COMMENT_TEXT = lazy_re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE)
# Matches a string that is empty or all whitespace
_WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE)
# Finds printf and FormatMessage style format specifiers
# Uses non-capturing groups except for the outermost group, so the output of
# re.split() should include both the normal text and what we intend to
# replace with placeholders.
# TODO(joi) Check documentation for printf (and Windows variants) and FormatMessage
_FORMAT_SPECIFIER = lazy_re.compile(
'(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last char
'(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)' # printf last char
'|\$[1-9][0-9]*)') # FormatMessage
class Rc2Grd(interface.Tool):
'''A tool for converting .rc files to .grd files. This tool is only for
converting the source (nontranslated) .rc file to a .grd file. For importing
existing translations, use the rc2xtb tool.
Usage: grit [global options] rc2grd [OPTIONS] RCFILE
The tool takes a single argument, which is the path to the .rc file to convert.
It outputs a .grd file with the same name in the same directory as the .rc file.
The .grd file may have one or more TODO comments for things that have to be
cleaned up manually.
OPTIONS may be any of the following:
-e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'.
-h TYPE Specify the TYPE attribute for HTML structures.
Default is 'tr_html'.
-u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'.
-n MATCH Specify the regular expression to match in comments that will
indicate that the resource the comment belongs to is not
translateable. Default is 'Not locali(s|z)able'.
-r GRDFILE Specify that GRDFILE should be used as a "role model" for
any placeholders that otherwise would have had TODO names.
This attempts to find an identical message in the GRDFILE
and uses that instead of the automatically placeholderized
message.
--pre CLASS Specify an optional, fully qualified classname, which
has to be a subclass of grit.tool.PreProcessor, to
run on the text of the RC file before conversion occurs.
This can be used to support constructs in the RC files
that GRIT cannot handle on its own.
--post CLASS Specify an optional, fully qualified classname, which
has to be a subclass of grit.tool.PostProcessor, to
run on the text of the converted RC file.
This can be used to alter the content of the RC file
based on the conversion that occured.
For menus, dialogs and version info, the .grd file will refer to the original
.rc file. Once conversion is complete, you can strip the original .rc file
of its string table and all comments as these will be available in the .grd
file.
Note that this tool WILL NOT obey C preprocessor rules, so even if something
is #if 0-ed out it will still be included in the output of this tool
Therefore, if your .rc file contains sections like this, you should run the
C preprocessor on the .rc file or manually edit it before using this tool.
'''
def ShortDescription(self):
return 'A tool for converting .rc source files to .grd files.'
def __init__(self):
self.input_encoding = 'cp1252'
self.html_type = 'tr_html'
self.html_encoding = 'utf-8'
self.not_localizable_re = re.compile('Not locali(s|z)able')
self.role_model = None
self.pre_process = None
self.post_process = None
def ParseOptions(self, args):
'''Given a list of arguments, set this object's options and return
all non-option arguments.
'''
(own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])
for (key, val) in own_opts:
if key == '-e':
self.input_encoding = val
elif key == '-h':
self.html_type = val
elif key == '-u':
self.html_encoding = val
elif key == '-n':
self.not_localizable_re = re.compile(val)
elif key == '-r':
self.role_model = grd_reader.Parse(val)
elif key == '--pre':
self.pre_process = val
elif key == '--post':
self.post_process = val
return args
def Run(self, opts, args):
args = self.ParseOptions(args)
if len(args) != 1:
print ('This tool takes a single tool-specific argument, the path to the\n'
'.rc file to process.')
return 2
self.SetOptions(opts)
path = args[0]
out_path = os.path.join(util.dirname(path),
os.path.splitext(os.path.basename(path))[0] + '.grd')
rctext = util.ReadFile(path, self.input_encoding)
grd_text = unicode(self.Process(rctext, path))
with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile:
outfile.write(grd_text)
print 'Wrote output file %s.\nPlease check for TODO items in the file.' % out_path
def Process(self, rctext, rc_path):
'''Processes 'rctext' and returns a resource tree corresponding to it.
Args:
rctext: complete text of the rc file
rc_path: 'resource\resource.rc'
Return:
grit.node.base.Node subclass
'''
if self.pre_process:
preprocess_class = util.NewClassInstance(self.pre_process,
preprocess_interface.PreProcessor)
if preprocess_class:
rctext = preprocess_class.Process(rctext, rc_path)
else:
self.Out(
'PreProcessing class could not be found. Skipping preprocessing.\n')
# Start with a basic skeleton for the .grd file
root = grd_reader.Parse(StringIO.StringIO(
'''<?xml version="1.0" encoding="UTF-8"?>
<grit base_dir="." latest_public_release="0"
current_release="1" source_lang_id="en">
<outputs />
<translations />
<release seq="1">
<includes />
<structures />
<messages />
</release>
</grit>'''), util.dirname(rc_path))
includes = root.children[2].children[0]
structures = root.children[2].children[1]
messages = root.children[2].children[2]
assert (isinstance(includes, grit.node.empty.IncludesNode) and
isinstance(structures, grit.node.empty.StructuresNode) and
isinstance(messages, grit.node.empty.MessagesNode))
self.AddIncludes(rctext, includes)
self.AddStructures(rctext, structures, os.path.basename(rc_path))
self.AddMessages(rctext, messages)
self.VerboseOut('Validating that all IDs are unique...\n')
root.ValidateUniqueIds()
self.ExtraVerboseOut('Done validating that all IDs are unique.\n')
if self.post_process:
postprocess_class = util.NewClassInstance(self.post_process,
postprocess_interface.PostProcessor)
if postprocess_class:
root = postprocess_class.Process(rctext, rc_path, root)
else:
self.Out(
'PostProcessing class could not be found. Skipping postprocessing.\n')
return root
def IsHtml(self, res_type, fname):
'''Check whether both the type and file extension indicate HTML'''
fext = fname.split('.')[-1].lower()
return res_type == 'HTML' and fext in ('htm', 'html')
def AddIncludes(self, rctext, node):
'''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and
adds each included resource as an <include> child node of 'node'.'''
for m in _FILE_REF.finditer(rctext):
id = m.group('id')
res_type = m.group('type').upper()
fname = rc.Section.UnEscape(m.group('file'))
assert fname.find('\n') == -1
if not self.IsHtml(res_type, fname):
self.VerboseOut('Processing %s with ID %s (filename: %s)\n' %
(res_type, id, fname))
node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname))
def AddStructures(self, rctext, node, rc_filename):
'''Scans 'rctext' for structured resources (e.g. menus, dialogs, version
information resources and HTML templates) and adds each as a <structure>
child of 'node'.'''
# First add HTML includes
for m in _FILE_REF.finditer(rctext):
id = m.group('id')
res_type = m.group('type').upper()
fname = rc.Section.UnEscape(m.group('file'))
if self.IsHtml(type, fname):
node.AddChild(structure.StructureNode.Construct(
node, id, self.html_type, fname, self.html_encoding))
# Then add all RC includes
def AddStructure(res_type, id):
self.VerboseOut('Processing %s with ID %s\n' % (res_type, id))
node.AddChild(structure.StructureNode.Construct(node, id, res_type,
rc_filename,
encoding=self.input_encoding))
for m in _MENU.finditer(rctext):
AddStructure('menu', m.group('id'))
for m in _DIALOG.finditer(rctext):
AddStructure('dialog', m.group('id'))
for m in _VERSIONINFO.finditer(rctext):
AddStructure('version', m.group('id'))
def AddMessages(self, rctext, node):
'''Scans 'rctext' for all messages in string tables, preprocesses them as
much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d
type format specifiers get those specifiers replaced with placeholders, and
HTML-formatted messages get run through the HTML-placeholderizer). Adds
each message as a <message> node child of 'node'.'''
for tm in _STRING_TABLE.finditer(rctext):
table = tm.group('body')
for mm in _MESSAGE.finditer(table):
comment_block = mm.group('comment')
comment_text = []
for cm in _COMMENT_TEXT.finditer(comment_block):
comment_text.append(cm.group('text'))
comment_text = ' '.join(comment_text)
id = mm.group('id')
text = rc.Section.UnEscape(mm.group('text'))
self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))
msg_obj = self.Placeholderize(text)
# Messages that contain only placeholders do not need translation.
is_translateable = False
for item in msg_obj.GetContent():
if isinstance(item, types.StringTypes):
if not _WHITESPACE_ONLY.match(item):
is_translateable = True
if self.not_localizable_re.search(comment_text):
is_translateable = False
message_meaning = ''
internal_comment = ''
# If we have a "role model" (existing GRD file) and this node exists
# in the role model, use the description, meaning and translateable
# attributes from the role model.
if self.role_model:
role_node = self.role_model.GetNodeById(id)
if role_node:
is_translateable = role_node.IsTranslateable()
message_meaning = role_node.attrs['meaning']
comment_text = role_node.attrs['desc']
internal_comment = role_node.attrs['internal_comment']
# For nontranslateable messages, we don't want the complexity of
# placeholderizing everything.
if not is_translateable:
msg_obj = tclib.Message(text=text)
msg_node = message.MessageNode.Construct(node, msg_obj, id,
desc=comment_text,
translateable=is_translateable,
meaning=message_meaning)
msg_node.attrs['internal_comment'] = internal_comment
node.AddChild(msg_node)
self.ExtraVerboseOut('Done processing message %s\n' % id)
def Placeholderize(self, text):
'''Creates a tclib.Message object from 'text', attempting to recognize
a few different formats of text that can be automatically placeholderized
(HTML code, printf-style format strings, and FormatMessage-style format
strings).
'''
try:
# First try HTML placeholderizing.
# TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing
msg = tr_html.HtmlToMessage(text, True)
for item in msg.GetContent():
if not isinstance(item, types.StringTypes):
return msg # Contained at least one placeholder, so we're done
# HTML placeholderization didn't do anything, so try to find printf or
# FormatMessage format specifiers and change them into placeholders.
msg = tclib.Message()
parts = _FORMAT_SPECIFIER.split(text)
todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc.
for part in parts:
if _FORMAT_SPECIFIER.match(part):
msg.AppendPlaceholder(tclib.Placeholder(
'TODO_%04d' % todo_counter, part, 'TODO'))
todo_counter += 1
elif part != '':
msg.AppendText(part)
if self.role_model and len(parts) > 1: # there are TODO placeholders
role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(
msg.GetRealContent(), '')
if role_model_msg:
# replace wholesale to get placeholder names and examples
msg = role_model_msg
return msg
except:
print 'Exception processing message with text "%s"' % text
raise