grit/tool/rc2grd.py - platform/external/chromium_org/tools/grit - Git at Google

 #!/usr/bin/env python
 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 '''The 'grit rc2grd' tool.'''


 import os.path
 import getopt
 import re
 import StringIO
 import types

 import grit.node.empty
 from grit.node import include
 from grit.node import structure
 from grit.node import message

 from grit.gather import rc
 from grit.gather import tr_html

 from grit.tool import interface
 from grit.tool import postprocess_interface
 from grit.tool import preprocess_interface

 from grit import grd_reader
 from grit import lazy_re
 from grit import tclib
 from grit import util


 # Matches files referenced from an .rc file
 _FILE_REF = lazy_re.compile('''
   ^(?P<id>[A-Z_0-9.]+)[ \t]+
   (?P<type>[A-Z_0-9]+)[ \t]+
   "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE)


 # Matches a dialog section
 _DIALOG = lazy_re.compile(
     '^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$',
     re.MULTILINE | re.DOTALL)


 # Matches a menu section
 _MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$',
                         re.MULTILINE | re.DOTALL)


 # Matches a versioninfo section
 _VERSIONINFO = lazy_re.compile(
     '^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^END\s*$',
     re.MULTILINE | re.DOTALL)


 # Matches a stringtable
 _STRING_TABLE = lazy_re.compile(
     ('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS.+|LANGUAGE.+|'
      'VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$'),
     re.MULTILINE | re.DOTALL)


 # Matches each message inside a stringtable, breaking it up into comments,
 # the ID of the message, and the (RC-escaped) message text.
 _MESSAGE = lazy_re.compile('''
   (?P<comment>(^\s+//.+?)*)  # 0 or more lines of comments preceding the message
   ^\s*
   (?P<id>[A-Za-z0-9_]+)  # id
   \s+
   "(?P<text>.*?([^"]|""))"([^"]|$)  # The message itself
   ''', re.MULTILINE | re.DOTALL | re.VERBOSE)


 # Matches each line of comment text in a multi-line comment.
 _COMMENT_TEXT = lazy_re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE)


 # Matches a string that is empty or all whitespace
 _WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE)


 # Finds printf and FormatMessage style format specifiers
 # Uses non-capturing groups except for the outermost group, so the output of
 # re.split() should include both the normal text and what we intend to
 # replace with placeholders.
 # TODO(joi) Check documentation for printf (and Windows variants) and FormatMessage
 _FORMAT_SPECIFIER = lazy_re.compile(
   '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last char
   '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)'              # printf last char
   '|\$[1-9][0-9]*)')                                     # FormatMessage


 class Rc2Grd(interface.Tool):
   '''A tool for converting .rc files to .grd files.  This tool is only for
 converting the source (nontranslated) .rc file to a .grd file.  For importing
 existing translations, use the rc2xtb tool.

 Usage:  grit [global options] rc2grd [OPTIONS] RCFILE

 The tool takes a single argument, which is the path to the .rc file to convert.
 It outputs a .grd file with the same name in the same directory as the .rc file.
 The .grd file may have one or more TODO comments for things that have to be
 cleaned up manually.

 OPTIONS may be any of the following:

   -e ENCODING    Specify the ENCODING of the .rc file. Default is 'cp1252'.

   -h TYPE        Specify the TYPE attribute for HTML structures.
                  Default is 'tr_html'.

   -u ENCODING    Specify the ENCODING of HTML files. Default is 'utf-8'.

   -n MATCH       Specify the regular expression to match in comments that will
                  indicate that the resource the comment belongs to is not
                  translateable. Default is 'Not locali(s|z)able'.

   -r GRDFILE     Specify that GRDFILE should be used as a "role model" for
                  any placeholders that otherwise would have had TODO names.
                  This attempts to find an identical message in the GRDFILE
                  and uses that instead of the automatically placeholderized
                  message.

   --pre CLASS    Specify an optional, fully qualified classname, which
                  has to be a subclass of grit.tool.PreProcessor, to
                  run on the text of the RC file before conversion occurs.
                  This can be used to support constructs in the RC files
                  that GRIT cannot handle on its own.

   --post CLASS   Specify an optional, fully qualified classname, which
                  has to be a subclass of grit.tool.PostProcessor, to
                  run on the text of the converted RC file.
                  This can be used to alter the content of the RC file
                  based on the conversion that occured.

 For menus, dialogs and version info, the .grd file will refer to the original
 .rc file.  Once conversion is complete, you can strip the original .rc file
 of its string table and all comments as these will be available in the .grd
 file.

 Note that this tool WILL NOT obey C preprocessor rules, so even if something
 is #if 0-ed out it will still be included in the output of this tool
 Therefore, if your .rc file contains sections like this, you should run the
 C preprocessor on the .rc file or manually edit it before using this tool.
 '''

   def ShortDescription(self):
     return 'A tool for converting .rc source files to .grd files.'

   def __init__(self):
     self.input_encoding = 'cp1252'
     self.html_type = 'tr_html'
     self.html_encoding = 'utf-8'
     self.not_localizable_re = re.compile('Not locali(s|z)able')
     self.role_model = None
     self.pre_process = None
     self.post_process = None

   def ParseOptions(self, args):
     '''Given a list of arguments, set this object's options and return
     all non-option arguments.
     '''
     (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])
     for (key, val) in own_opts:
       if key == '-e':
         self.input_encoding = val
       elif key == '-h':
         self.html_type = val
       elif key == '-u':
         self.html_encoding = val
       elif key == '-n':
         self.not_localizable_re = re.compile(val)
       elif key == '-r':
         self.role_model = grd_reader.Parse(val)
       elif key == '--pre':
         self.pre_process = val
       elif key == '--post':
         self.post_process = val
     return args

   def Run(self, opts, args):
     args = self.ParseOptions(args)
     if len(args) != 1:
       print ('This tool takes a single tool-specific argument, the path to the\n'
              '.rc file to process.')
       return 2
     self.SetOptions(opts)

     path = args[0]
     out_path = os.path.join(util.dirname(path),
                 os.path.splitext(os.path.basename(path))[0] + '.grd')

     rctext = util.ReadFile(path, self.input_encoding)
     grd_text = unicode(self.Process(rctext, path))
     with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile:
       outfile.write(grd_text)

     print 'Wrote output file %s.\nPlease check for TODO items in the file.' % out_path


   def Process(self, rctext, rc_path):
     '''Processes 'rctext' and returns a resource tree corresponding to it.

     Args:
       rctext: complete text of the rc file
       rc_path: 'resource\resource.rc'

     Return:
       grit.node.base.Node subclass
     '''

     if self.pre_process:
       preprocess_class = util.NewClassInstance(self.pre_process,
                                                preprocess_interface.PreProcessor)
       if preprocess_class:
         rctext = preprocess_class.Process(rctext, rc_path)
       else:
         self.Out(
           'PreProcessing class could not be found. Skipping preprocessing.\n')

     # Start with a basic skeleton for the .grd file
     root = grd_reader.Parse(StringIO.StringIO(
       '''<?xml version="1.0" encoding="UTF-8"?>
       <grit base_dir="." latest_public_release="0"
           current_release="1" source_lang_id="en">
         <outputs />
         <translations />
         <release seq="1">
           <includes />
           <structures />
           <messages />
         </release>
       </grit>'''), util.dirname(rc_path))
     includes = root.children[2].children[0]
     structures = root.children[2].children[1]
     messages = root.children[2].children[2]
     assert (isinstance(includes, grit.node.empty.IncludesNode) and
             isinstance(structures, grit.node.empty.StructuresNode) and
             isinstance(messages, grit.node.empty.MessagesNode))

     self.AddIncludes(rctext, includes)
     self.AddStructures(rctext, structures, os.path.basename(rc_path))
     self.AddMessages(rctext, messages)

     self.VerboseOut('Validating that all IDs are unique...\n')
     root.ValidateUniqueIds()
     self.ExtraVerboseOut('Done validating that all IDs are unique.\n')

     if self.post_process:
       postprocess_class = util.NewClassInstance(self.post_process,
                                                 postprocess_interface.PostProcessor)
       if postprocess_class:
         root = postprocess_class.Process(rctext, rc_path, root)
       else:
         self.Out(
           'PostProcessing class could not be found. Skipping postprocessing.\n')

     return root


   def IsHtml(self, res_type, fname):
     '''Check whether both the type and file extension indicate HTML'''
     fext = fname.split('.')[-1].lower()
     return res_type == 'HTML' and fext in ('htm', 'html')


   def AddIncludes(self, rctext, node):
     '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and
     adds each included resource as an <include> child node of 'node'.'''
     for m in _FILE_REF.finditer(rctext):
       id = m.group('id')
       res_type = m.group('type').upper()
       fname = rc.Section.UnEscape(m.group('file'))
       assert fname.find('\n') == -1
       if not self.IsHtml(res_type, fname):
         self.VerboseOut('Processing %s with ID %s (filename: %s)\n' %
                         (res_type, id, fname))
         node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname))


   def AddStructures(self, rctext, node, rc_filename):
     '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version
     information resources and HTML templates) and adds each as a <structure>
     child of 'node'.'''
     # First add HTML includes
     for m in _FILE_REF.finditer(rctext):
       id = m.group('id')
       res_type = m.group('type').upper()
       fname = rc.Section.UnEscape(m.group('file'))
       if self.IsHtml(type, fname):
         node.AddChild(structure.StructureNode.Construct(
           node, id, self.html_type, fname, self.html_encoding))

     # Then add all RC includes
     def AddStructure(res_type, id):
       self.VerboseOut('Processing %s with ID %s\n' % (res_type, id))
       node.AddChild(structure.StructureNode.Construct(node, id, res_type,
                                                       rc_filename,
                                                       encoding=self.input_encoding))
     for m in _MENU.finditer(rctext):
       AddStructure('menu', m.group('id'))
     for m in _DIALOG.finditer(rctext):
       AddStructure('dialog', m.group('id'))
     for m in _VERSIONINFO.finditer(rctext):
       AddStructure('version', m.group('id'))


   def AddMessages(self, rctext, node):
     '''Scans 'rctext' for all messages in string tables, preprocesses them as
     much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d
     type format specifiers get those specifiers replaced with placeholders, and
     HTML-formatted messages get run through the HTML-placeholderizer).  Adds
     each message as a <message> node child of 'node'.'''
     for tm in _STRING_TABLE.finditer(rctext):
       table = tm.group('body')
       for mm in _MESSAGE.finditer(table):
         comment_block = mm.group('comment')
         comment_text = []
         for cm in _COMMENT_TEXT.finditer(comment_block):
           comment_text.append(cm.group('text'))
         comment_text = ' '.join(comment_text)

         id = mm.group('id')
         text = rc.Section.UnEscape(mm.group('text'))

         self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))

         msg_obj = self.Placeholderize(text)

         # Messages that contain only placeholders do not need translation.
         is_translateable = False
         for item in msg_obj.GetContent():
           if isinstance(item, types.StringTypes):
             if not _WHITESPACE_ONLY.match(item):
               is_translateable = True

         if self.not_localizable_re.search(comment_text):
           is_translateable = False

         message_meaning = ''
         internal_comment = ''

         # If we have a "role model" (existing GRD file) and this node exists
         # in the role model, use the description, meaning and translateable
         # attributes from the role model.
         if self.role_model:
           role_node = self.role_model.GetNodeById(id)
           if role_node:
             is_translateable = role_node.IsTranslateable()
             message_meaning = role_node.attrs['meaning']
             comment_text = role_node.attrs['desc']
             internal_comment = role_node.attrs['internal_comment']

         # For nontranslateable messages, we don't want the complexity of
         # placeholderizing everything.
         if not is_translateable:
           msg_obj = tclib.Message(text=text)

         msg_node = message.MessageNode.Construct(node, msg_obj, id,
                                                  desc=comment_text,
                                                  translateable=is_translateable,
                                                  meaning=message_meaning)
         msg_node.attrs['internal_comment'] = internal_comment

         node.AddChild(msg_node)
         self.ExtraVerboseOut('Done processing message %s\n' % id)


   def Placeholderize(self, text):
     '''Creates a tclib.Message object from 'text', attempting to recognize
     a few different formats of text that can be automatically placeholderized
     (HTML code, printf-style format strings, and FormatMessage-style format
     strings).
     '''

     try:
       # First try HTML placeholderizing.
       # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing
       msg = tr_html.HtmlToMessage(text, True)
       for item in msg.GetContent():
         if not isinstance(item, types.StringTypes):
           return msg  # Contained at least one placeholder, so we're done

       # HTML placeholderization didn't do anything, so try to find printf or
       # FormatMessage format specifiers and change them into placeholders.
       msg = tclib.Message()
       parts = _FORMAT_SPECIFIER.split(text)
       todo_counter = 1  # We make placeholder IDs 'TODO_0001' etc.
       for part in parts:
         if _FORMAT_SPECIFIER.match(part):
           msg.AppendPlaceholder(tclib.Placeholder(
             'TODO_%04d' % todo_counter, part, 'TODO'))
           todo_counter += 1
         elif part != '':
           msg.AppendText(part)

       if self.role_model and len(parts) > 1:  # there are TODO placeholders
         role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(
           msg.GetRealContent(), '')
         if role_model_msg:
           # replace wholesale to get placeholder names and examples
           msg = role_model_msg

       return msg
     except:
       print 'Exception processing message with text "%s"' % text
       raise
	#!/usr/bin/env python
	# Copyright (c) 2012 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	'''The 'grit rc2grd' tool.'''


	import os.path
	import getopt
	import re
	import StringIO
	import types

	import grit.node.empty
	from grit.node import include
	from grit.node import structure
	from grit.node import message

	from grit.gather import rc
	from grit.gather import tr_html

	from grit.tool import interface
	from grit.tool import postprocess_interface
	from grit.tool import preprocess_interface

	from grit import grd_reader
	from grit import lazy_re
	from grit import tclib
	from grit import util


	# Matches files referenced from an .rc file
	_FILE_REF = lazy_re.compile('''
	^(?P<id>[A-Z_0-9.]+)[ \t]+
	(?P<type>[A-Z_0-9]+)[ \t]+
	"(?P<file>.?([^"]\|""))"[ \t]$''', re.VERBOSE \| re.MULTILINE)


	# Matches a dialog section
	_DIALOG = lazy_re.compile(
	'^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s$.+?^END\s$',
	re.MULTILINE \| re.DOTALL)


	# Matches a menu section
	_MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s$.+?^END\s$',
	re.MULTILINE \| re.DOTALL)


	# Matches a versioninfo section
	_VERSIONINFO = lazy_re.compile(
	'^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s$.+?^END\s$',
	re.MULTILINE \| re.DOTALL)


	# Matches a stringtable
	_STRING_TABLE = lazy_re.compile(
	('^STRINGTABLE(\s+(PRELOAD\|DISCARDABLE\|CHARACTERISTICS.+\|LANGUAGE.+\|'
	'VERSION.+))\s\nBEGIN\s$(?P<body>.+?)^END\s$'),
	re.MULTILINE \| re.DOTALL)


	# Matches each message inside a stringtable, breaking it up into comments,
	# the ID of the message, and the (RC-escaped) message text.
	_MESSAGE = lazy_re.compile('''
	(?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message
	^\s*
	(?P<id>[A-Za-z0-9_]+) # id
	\s+
	"(?P<text>.*?([^"]\|""))"([^"]\|$) # The message itself
	''', re.MULTILINE \| re.DOTALL \| re.VERBOSE)


	# Matches each line of comment text in a multi-line comment.
	_COMMENT_TEXT = lazy_re.compile('^\s//\s(?P<text>.+?)$', re.MULTILINE)


	# Matches a string that is empty or all whitespace
	_WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE)


	# Finds printf and FormatMessage style format specifiers
	# Uses non-capturing groups except for the outermost group, so the output of
	# re.split() should include both the normal text and what we intend to
	# replace with placeholders.
	# TODO(joi) Check documentation for printf (and Windows variants) and FormatMessage
	_FORMAT_SPECIFIER = lazy_re.compile(
	'(%[-# +]?(?:[0-9]\|\)(?:\.(?:[0-9]+\|\*))?(?:h\|l\|L)?' # printf up to last char
	'(?:d\|i\|o\|u\|x\|X\|e\|E\|f\|F\|g\|G\|c\|r\|s\|ls\|ws)' # printf last char
	'\|\$[1-9][0-9]*)') # FormatMessage


	class Rc2Grd(interface.Tool):
	'''A tool for converting .rc files to .grd files. This tool is only for
	converting the source (nontranslated) .rc file to a .grd file. For importing
	existing translations, use the rc2xtb tool.

	Usage: grit [global options] rc2grd [OPTIONS] RCFILE

	The tool takes a single argument, which is the path to the .rc file to convert.
	It outputs a .grd file with the same name in the same directory as the .rc file.
	The .grd file may have one or more TODO comments for things that have to be
	cleaned up manually.

	OPTIONS may be any of the following:

	-e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'.

	-h TYPE Specify the TYPE attribute for HTML structures.
	Default is 'tr_html'.

	-u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'.

	-n MATCH Specify the regular expression to match in comments that will
	indicate that the resource the comment belongs to is not
	translateable. Default is 'Not locali(s\|z)able'.

	-r GRDFILE Specify that GRDFILE should be used as a "role model" for
	any placeholders that otherwise would have had TODO names.
	This attempts to find an identical message in the GRDFILE
	and uses that instead of the automatically placeholderized
	message.

	--pre CLASS Specify an optional, fully qualified classname, which
	has to be a subclass of grit.tool.PreProcessor, to
	run on the text of the RC file before conversion occurs.
	This can be used to support constructs in the RC files
	that GRIT cannot handle on its own.

	--post CLASS Specify an optional, fully qualified classname, which
	has to be a subclass of grit.tool.PostProcessor, to
	run on the text of the converted RC file.
	This can be used to alter the content of the RC file
	based on the conversion that occured.

	For menus, dialogs and version info, the .grd file will refer to the original
	.rc file. Once conversion is complete, you can strip the original .rc file
	of its string table and all comments as these will be available in the .grd
	file.

	Note that this tool WILL NOT obey C preprocessor rules, so even if something
	is #if 0-ed out it will still be included in the output of this tool
	Therefore, if your .rc file contains sections like this, you should run the
	C preprocessor on the .rc file or manually edit it before using this tool.
	'''

	def ShortDescription(self):
	return 'A tool for converting .rc source files to .grd files.'

	def __init__(self):
	self.input_encoding = 'cp1252'
	self.html_type = 'tr_html'
	self.html_encoding = 'utf-8'
	self.not_localizable_re = re.compile('Not locali(s\|z)able')
	self.role_model = None
	self.pre_process = None
	self.post_process = None

	def ParseOptions(self, args):
	'''Given a list of arguments, set this object's options and return
	all non-option arguments.
	'''
	(own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])
	for (key, val) in own_opts:
	if key == '-e':
	self.input_encoding = val
	elif key == '-h':
	self.html_type = val
	elif key == '-u':
	self.html_encoding = val
	elif key == '-n':
	self.not_localizable_re = re.compile(val)
	elif key == '-r':
	self.role_model = grd_reader.Parse(val)
	elif key == '--pre':
	self.pre_process = val
	elif key == '--post':
	self.post_process = val
	return args

	def Run(self, opts, args):
	args = self.ParseOptions(args)
	if len(args) != 1:
	print ('This tool takes a single tool-specific argument, the path to the\n'
	'.rc file to process.')
	return 2
	self.SetOptions(opts)

	path = args[0]
	out_path = os.path.join(util.dirname(path),
	os.path.splitext(os.path.basename(path))[0] + '.grd')

	rctext = util.ReadFile(path, self.input_encoding)
	grd_text = unicode(self.Process(rctext, path))
	with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile:
	outfile.write(grd_text)

	print 'Wrote output file %s.\nPlease check for TODO items in the file.' % out_path


	def Process(self, rctext, rc_path):
	'''Processes 'rctext' and returns a resource tree corresponding to it.

	Args:
	rctext: complete text of the rc file
	rc_path: 'resource\resource.rc'

	Return:
	grit.node.base.Node subclass
	'''

	if self.pre_process:
	preprocess_class = util.NewClassInstance(self.pre_process,
	preprocess_interface.PreProcessor)
	if preprocess_class:
	rctext = preprocess_class.Process(rctext, rc_path)
	else:
	self.Out(
	'PreProcessing class could not be found. Skipping preprocessing.\n')

	# Start with a basic skeleton for the .grd file
	root = grd_reader.Parse(StringIO.StringIO(
	'''<?xml version="1.0" encoding="UTF-8"?>
	<grit base_dir="." latest_public_release="0"
	current_release="1" source_lang_id="en">
	<outputs />
	<translations />
	<release seq="1">
	<includes />
	<structures />
	<messages />
	</release>
	</grit>'''), util.dirname(rc_path))
	includes = root.children[2].children[0]
	structures = root.children[2].children[1]
	messages = root.children[2].children[2]
	assert (isinstance(includes, grit.node.empty.IncludesNode) and
	isinstance(structures, grit.node.empty.StructuresNode) and
	isinstance(messages, grit.node.empty.MessagesNode))

	self.AddIncludes(rctext, includes)
	self.AddStructures(rctext, structures, os.path.basename(rc_path))
	self.AddMessages(rctext, messages)

	self.VerboseOut('Validating that all IDs are unique...\n')
	root.ValidateUniqueIds()
	self.ExtraVerboseOut('Done validating that all IDs are unique.\n')

	if self.post_process:
	postprocess_class = util.NewClassInstance(self.post_process,
	postprocess_interface.PostProcessor)
	if postprocess_class:
	root = postprocess_class.Process(rctext, rc_path, root)
	else:
	self.Out(
	'PostProcessing class could not be found. Skipping postprocessing.\n')

	return root


	def IsHtml(self, res_type, fname):
	'''Check whether both the type and file extension indicate HTML'''
	fext = fname.split('.')[-1].lower()
	return res_type == 'HTML' and fext in ('htm', 'html')


	def AddIncludes(self, rctext, node):
	'''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and
	adds each included resource as an <include> child node of 'node'.'''
	for m in _FILE_REF.finditer(rctext):
	id = m.group('id')
	res_type = m.group('type').upper()
	fname = rc.Section.UnEscape(m.group('file'))
	assert fname.find('\n') == -1
	if not self.IsHtml(res_type, fname):
	self.VerboseOut('Processing %s with ID %s (filename: %s)\n' %
	(res_type, id, fname))
	node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname))


	def AddStructures(self, rctext, node, rc_filename):
	'''Scans 'rctext' for structured resources (e.g. menus, dialogs, version
	information resources and HTML templates) and adds each as a <structure>
	child of 'node'.'''
	# First add HTML includes
	for m in _FILE_REF.finditer(rctext):
	id = m.group('id')
	res_type = m.group('type').upper()
	fname = rc.Section.UnEscape(m.group('file'))
	if self.IsHtml(type, fname):
	node.AddChild(structure.StructureNode.Construct(
	node, id, self.html_type, fname, self.html_encoding))

	# Then add all RC includes
	def AddStructure(res_type, id):
	self.VerboseOut('Processing %s with ID %s\n' % (res_type, id))
	node.AddChild(structure.StructureNode.Construct(node, id, res_type,
	rc_filename,
	encoding=self.input_encoding))
	for m in _MENU.finditer(rctext):
	AddStructure('menu', m.group('id'))
	for m in _DIALOG.finditer(rctext):
	AddStructure('dialog', m.group('id'))
	for m in _VERSIONINFO.finditer(rctext):
	AddStructure('version', m.group('id'))


	def AddMessages(self, rctext, node):
	'''Scans 'rctext' for all messages in string tables, preprocesses them as
	much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d
	type format specifiers get those specifiers replaced with placeholders, and
	HTML-formatted messages get run through the HTML-placeholderizer). Adds
	each message as a <message> node child of 'node'.'''
	for tm in _STRING_TABLE.finditer(rctext):
	table = tm.group('body')
	for mm in _MESSAGE.finditer(table):
	comment_block = mm.group('comment')
	comment_text = []
	for cm in _COMMENT_TEXT.finditer(comment_block):
	comment_text.append(cm.group('text'))
	comment_text = ' '.join(comment_text)

	id = mm.group('id')
	text = rc.Section.UnEscape(mm.group('text'))

	self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))

	msg_obj = self.Placeholderize(text)

	# Messages that contain only placeholders do not need translation.
	is_translateable = False
	for item in msg_obj.GetContent():
	if isinstance(item, types.StringTypes):
	if not _WHITESPACE_ONLY.match(item):
	is_translateable = True

	if self.not_localizable_re.search(comment_text):
	is_translateable = False

	message_meaning = ''
	internal_comment = ''

	# If we have a "role model" (existing GRD file) and this node exists
	# in the role model, use the description, meaning and translateable
	# attributes from the role model.
	if self.role_model:
	role_node = self.role_model.GetNodeById(id)
	if role_node:
	is_translateable = role_node.IsTranslateable()
	message_meaning = role_node.attrs['meaning']
	comment_text = role_node.attrs['desc']
	internal_comment = role_node.attrs['internal_comment']

	# For nontranslateable messages, we don't want the complexity of
	# placeholderizing everything.
	if not is_translateable:
	msg_obj = tclib.Message(text=text)

	msg_node = message.MessageNode.Construct(node, msg_obj, id,
	desc=comment_text,
	translateable=is_translateable,
	meaning=message_meaning)
	msg_node.attrs['internal_comment'] = internal_comment

	node.AddChild(msg_node)
	self.ExtraVerboseOut('Done processing message %s\n' % id)


	def Placeholderize(self, text):
	'''Creates a tclib.Message object from 'text', attempting to recognize
	a few different formats of text that can be automatically placeholderized
	(HTML code, printf-style format strings, and FormatMessage-style format
	strings).
	'''

	try:
	# First try HTML placeholderizing.
	# TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing
	msg = tr_html.HtmlToMessage(text, True)
	for item in msg.GetContent():
	if not isinstance(item, types.StringTypes):
	return msg # Contained at least one placeholder, so we're done

	# HTML placeholderization didn't do anything, so try to find printf or
	# FormatMessage format specifiers and change them into placeholders.
	msg = tclib.Message()
	parts = _FORMAT_SPECIFIER.split(text)
	todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc.
	for part in parts:
	if _FORMAT_SPECIFIER.match(part):
	msg.AppendPlaceholder(tclib.Placeholder(
	'TODO_%04d' % todo_counter, part, 'TODO'))
	todo_counter += 1
	elif part != '':
	msg.AppendText(part)

	if self.role_model and len(parts) > 1: # there are TODO placeholders
	role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(
	msg.GetRealContent(), '')
	if role_model_msg:
	# replace wholesale to get placeholder names and examples
	msg = role_model_msg

	return msg
	except:
	print 'Exception processing message with text "%s"' % text
	raise