allow EventLog tag numbers to be auto-assigned

With this change, you can specify "?" in place of a tag number in a
.logtags file and the build system will assign numbers to these tags.
(The numbers used shouldn't matter since we translate them back to tag
names whenever the logs are read back.)

This is pretty straightforward to do:

- make merge-event-log-tags.py assign numbers to any tags that specify
  "?"

- make the generated java files depend on the merged output

- make java-event-log-tags.py read both the original .logtags and the
  merged output, and fill in tag numbers for any "?" using the merged
  version.

Change-Id: Icc6ccd705db461d570fc929922a830aa6deaca48
diff --git a/core/base_rules.mk b/core/base_rules.mk
index 2b3e94b..c4e19b4 100644
--- a/core/base_rules.mk
+++ b/core/base_rules.mk
@@ -220,7 +220,7 @@
 logtags_java_sources := $(patsubst %.logtags,%.java,$(addprefix $(intermediates.COMMON)/src/, $(logtags_sources)))
 logtags_sources := $(addprefix $(TOP_DIR)$(LOCAL_PATH)/, $(logtags_sources))
 
-$(logtags_java_sources): $(intermediates.COMMON)/src/%.java: $(TOPDIR)$(LOCAL_PATH)/%.logtags
+$(logtags_java_sources): $(intermediates.COMMON)/src/%.java: $(TOPDIR)$(LOCAL_PATH)/%.logtags $(TARGET_OUT)/etc/event-log-tags
 	$(transform-logtags-to-java)
 
 endif
diff --git a/core/definitions.mk b/core/definitions.mk
index b9845f8..1afbb90 100644
--- a/core/definitions.mk
+++ b/core/definitions.mk
@@ -752,7 +752,7 @@
 define transform-logtags-to-java
 @mkdir -p $(dir $@)
 @echo "logtags: $@ <= $<"
-$(hide) $(JAVATAGS) -o $@ $<
+$(hide) $(JAVATAGS) -o $@ $^
 endef
 
 
diff --git a/tools/event_log_tags.py b/tools/event_log_tags.py
index 4e6d960..81e8b39 100644
--- a/tools/event_log_tags.py
+++ b/tools/event_log_tags.py
@@ -75,11 +75,14 @@
           self.options[parts[1]] = parts[2:]
           continue
 
-        try:
-          tag = int(parts[0])
-        except ValueError:
-          self.AddError("\"%s\" isn't an integer tag" % (parts[0],))
-          continue
+        if parts[0] == "?":
+          tag = None
+        else:
+          try:
+            tag = int(parts[0])
+          except ValueError:
+            self.AddError("\"%s\" isn't an integer tag or '?'" % (parts[0],))
+            continue
 
         tagname = parts[1]
         if len(parts) == 3:
diff --git a/tools/java-event-log-tags.py b/tools/java-event-log-tags.py
index 3713bd8..552021e 100755
--- a/tools/java-event-log-tags.py
+++ b/tools/java-event-log-tags.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 
 """
-Usage: java-event-log-tags.py [-o output_file] <input_file>
+Usage: java-event-log-tags.py [-o output_file] <input_file> <merged_tags_file>
 
 Generate a java class containing constants for each of the event log
 tags in the given input file.
@@ -50,14 +50,24 @@
     print >> sys.stderr, "unhandled option %s" % (o,)
     sys.exit(1)
 
-if len(args) != 1:
-  print "need exactly one input file, not %d" % (len(args),)
+if len(args) != 2:
+  print "need exactly two input files, not %d" % (len(args),)
   print __doc__
   sys.exit(1)
 
 fn = args[0]
 tagfile = event_log_tags.TagFile(fn)
 
+# Load the merged tag file (which should have numbers assigned for all
+# tags.  Use the numbers from the merged file to fill in any missing
+# numbers from the input file.
+merged_fn = args[1]
+merged_tagfile = event_log_tags.TagFile(merged_fn)
+merged_by_name = dict([(t.tagname, t) for t in merged_tagfile.tags])
+for t in tagfile.tags:
+  if t.tagnum is None:
+    t.tagnum = merged_by_name[t.tagname].tagnum
+
 if "java_package" not in tagfile.options:
   tagfile.AddError("java_package option not specified", linenum=0)
 
diff --git a/tools/merge-event-log-tags.py b/tools/merge-event-log-tags.py
index 2852612..c8f36fc 100755
--- a/tools/merge-event-log-tags.py
+++ b/tools/merge-event-log-tags.py
@@ -26,16 +26,22 @@
 
 import cStringIO
 import getopt
+import md5
+import struct
 import sys
 
 import event_log_tags
 
-by_tagnum = {}
 errors = []
 warnings = []
 
 output_file = None
 
+# Tags with a tag number of ? are assigned a tag in the range
+# [ASSIGN_START, ASSIGN_LIMIT).
+ASSIGN_START = 900000
+ASSIGN_LIMIT = 1000000
+
 try:
   opts, args = getopt.getopt(sys.argv[1:], "ho:")
 except getopt.GetoptError, err:
@@ -53,6 +59,18 @@
     print >> sys.stderr, "unhandled option %s" % (o,)
     sys.exit(1)
 
+# Restrictions on tags:
+#
+#   Tag names must be unique.  (If the tag number and description are
+#   also the same, a warning is issued instead of an error.)
+#
+#   Explicit tag numbers must be unique.  (If the tag name is also the
+#   same, no error is issued because the above rule will issue a
+#   warning or error.)
+
+by_tagname = {}
+by_tagnum = {}
+
 for fn in args:
   tagfile = event_log_tags.TagFile(fn)
 
@@ -61,24 +79,37 @@
     tagname = t.tagname
     description = t.description
 
-    if t.tagnum in by_tagnum:
-      orig = by_tagnum[t.tagnum]
+    if t.tagname in by_tagname:
+      orig = by_tagname[t.tagname]
 
-      if (t.tagname == orig.tagname and
+      if (t.tagnum == orig.tagnum and
           t.description == orig.description):
         # if the name and description are identical, issue a warning
         # instead of failing (to make it easier to move tags between
         # projects without breaking the build).
-        tagfile.AddWarning("tag %d \"%s\" duplicated in %s:%d" %
-                           (t.tagnum, t.tagname, orig.filename, orig.linenum),
+        tagfile.AddWarning("tag \"%s\" (%s) duplicated in %s:%d" %
+                           (t.tagname, t.tagnum, orig.filename, orig.linenum),
                            linenum=t.linenum)
       else:
-        tagfile.AddError("tag %d used by conflicting \"%s\" from %s:%d" %
-                         (t.tagnum, orig.tagname, orig.filename, orig.linenum),
-                         linenum=t.linenum)
+        tagfile.AddError(
+            "tag name \"%s\" used by conflicting tag %s from %s:%d" %
+            (t.tagname, orig.tagnum, orig.filename, orig.linenum),
+            linenum=t.linenum)
       continue
 
-    by_tagnum[t.tagnum] = t
+    if t.tagnum is not None and t.tagnum in by_tagnum:
+      orig = by_tagnum[t.tagnum]
+
+      if t.tagname != orig.tagname:
+        tagfile.AddError(
+            "tag number %d used by conflicting tag \"%s\" from %s:%d" %
+            (t.tagnum, orig.tagname, orig.filename, orig.linenum),
+            linenum=t.linenum)
+        continue
+
+    by_tagname[t.tagname] = t
+    if t.tagnum is not None:
+      by_tagnum[t.tagnum] = t
 
   errors.extend(tagfile.errors)
   warnings.extend(tagfile.warnings)
@@ -92,9 +123,31 @@
   for fn, ln, msg in warnings:
     print >> sys.stderr, "%s:%d: warning: %s" % (fn, ln, msg)
 
+# Python's hash function (a) isn't great and (b) varies between
+# versions of python.  Using md5 is overkill here but is the same from
+# platform to platform and speed shouldn't matter in practice.
+def hashname(str):
+  d = md5.md5(str).digest()[:4]
+  return struct.unpack("!I", d)[0]
+
+# Assign a tag number to all the entries that say they want one
+# assigned.  We do this based on a hash of the tag name so that the
+# numbers should stay relatively stable as tags are added.
+
+for name, t in sorted(by_tagname.iteritems()):
+  if t.tagnum is None:
+    while True:
+      x = (hashname(name) % (ASSIGN_LIMIT - ASSIGN_START)) + ASSIGN_START
+      if x not in by_tagnum:
+        t.tagnum = x
+        by_tagnum[x] = t
+        break
+      name = "_" + name
+
+# by_tagnum should be complete now; we've assigned numbers to all tags.
+
 buffer = cStringIO.StringIO()
-for n in sorted(by_tagnum):
-  t = by_tagnum[n]
+for n, t in sorted(by_tagnum.iteritems()):
   if t.description:
     buffer.write("%d %s %s\n" % (t.tagnum, t.tagname, t.description))
   else: