Checker: Allow don't-care output on a line

This patch changes the behaviour of whitespace characters in CHECK
lines, allowing for additional content between verified parts of the
matched output line. Tests therefore won't need to explicitly match
attributes which are not tested.

The way attributes are printed ensures that the right part of the
line is matched against.

Example:
  - output line:   i32 Div [ i4 i8 ] ( loop_header:null )
  - CHECK before:  Div [ {{i\d+}} {{i\d+}} ] ( loop_header:null )
  - CHECK now:     Div ( loop_header:null )

Change-Id: Icf6bacfb285ae288bea21640e860a871a94cc386
diff --git a/tools/checker.py b/tools/checker.py
index 5744c15..0bce236 100755
--- a/tools/checker.py
+++ b/tools/checker.py
@@ -128,7 +128,7 @@
     Logger.log(location, Logger.Level.Error, color=Logger.Color.Gray, newLine=False, out=sys.stderr)
     Logger.log("error: ", Logger.Level.Error, color=Logger.Color.Red, newLine=False, out=sys.stderr)
     Logger.log(msg, Logger.Level.Error, out=sys.stderr)
-    sys.exit(1)
+    sys.exit(msg)
 
   @staticmethod
   def startTest(name):
@@ -162,7 +162,7 @@
 
   class Variant(object):
     """Supported language constructs."""
-    Text, Pattern, VarRef, VarDef = range(4)
+    Text, Pattern, VarRef, VarDef, Separator = range(5)
 
   rStartOptional = r"("
   rEndOptional = r")?"
@@ -187,6 +187,10 @@
     self.pattern = pattern
 
   @staticmethod
+  def newSeparator():
+    return CheckElement(CheckElement.Variant.Separator, None, None)
+
+  @staticmethod
   def parseText(text):
     return CheckElement(CheckElement.Variant.Text, None, re.escape(text))
 
@@ -261,11 +265,10 @@
       # If one of the above was identified at the current position, extract them
       # from the line, parse them and add to the list of line parts.
       if self.__isMatchAtStart(matchWhitespace):
-        # We want to be whitespace-agnostic so whenever a check line contains
-        # a whitespace, we add a regex pattern for an arbitrary non-zero number
-        # of whitespaces.
+        # A whitespace in the check line creates a new separator of line parts.
+        # This allows for ignored output between the previous and next parts.
         line = line[matchWhitespace.end():]
-        lineParts.append(CheckElement.parsePattern(r"{{\s+}}"))
+        lineParts.append(CheckElement.newSeparator())
       elif self.__isMatchAtStart(matchPattern):
         pattern = line[0:matchPattern.end()]
         line = line[matchPattern.end():]
@@ -298,49 +301,55 @@
     else:
       return linePart.pattern
 
+  def __isSeparated(self, outputLine, matchStart):
+    return (matchStart == 0) or (outputLine[matchStart - 1:matchStart].isspace())
+
   # Attempts to match the check line against a line from the output file with
   # the given initial variable values. It returns the new variable state if
   # successful and None otherwise.
   def match(self, outputLine, initialVarState):
-    initialSearchFrom = 0
-    initialPattern = self.__generatePattern(self.lineParts[0], initialVarState)
-    while True:
-      # Search for the first element on the regex parts list. This will mark
-      # the point on the line from which we will attempt to match the rest of
-      # the check pattern. If this iteration produces only a partial match,
-      # the next iteration will start searching further in the output.
-      firstMatch = re.search(initialPattern, outputLine[initialSearchFrom:])
-      if firstMatch is None:
-        return None
-      matchStart = initialSearchFrom + firstMatch.start()
-      initialSearchFrom += firstMatch.start() + 1
+    # Do the full matching on a shadow copy of the variable state. If the
+    # matching fails half-way, we will not need to revert the state.
+    varState = dict(initialVarState)
 
-      # Do the full matching on a shadow copy of the variable state. If the
-      # matching fails half-way, we will not need to revert the state.
-      varState = dict(initialVarState)
+    matchStart = 0
+    isAfterSeparator = True
 
-      # Now try to parse all of the parts of the check line in the right order.
-      # Variable values are updated on-the-fly, meaning that a variable can
-      # be referenced immediately after its definition.
-      fullyMatched = True
-      for part in self.lineParts:
-        pattern = self.__generatePattern(part, varState)
-        match = re.match(pattern, outputLine[matchStart:])
-        if match is None:
-          fullyMatched = False
-          break
+    # Now try to parse all of the parts of the check line in the right order.
+    # Variable values are updated on-the-fly, meaning that a variable can
+    # be referenced immediately after its definition.
+    for part in self.lineParts:
+      if part.variant == CheckElement.Variant.Separator:
+        isAfterSeparator = True
+        continue
+
+      # Find the earliest match for this line part.
+      pattern = self.__generatePattern(part, varState)
+      while True:
+        match = re.search(pattern, outputLine[matchStart:])
+        if (match is None) or (not isAfterSeparator and not self.__isMatchAtStart(match)):
+          return None
         matchEnd = matchStart + match.end()
-        if part.variant == CheckElement.Variant.VarDef:
-          if part.name in varState:
-            Logger.testFailed("Multiple definitions of variable \"" + part.name + "\"",
-                              self.fileName, self.lineNo)
-          varState[part.name] = outputLine[matchStart:matchEnd]
-        matchStart = matchEnd
+        matchStart += match.start()
 
-      # Return the new variable state if all parts were successfully matched.
-      # Otherwise loop and try to find another start point on the same line.
-      if fullyMatched:
-        return varState
+        # Check if this is a valid match if we expect a whitespace separator
+        # before the matched text. Otherwise loop and look for another match.
+        if not isAfterSeparator or self.__isSeparated(outputLine, matchStart):
+          break
+        else:
+          matchStart += 1
+
+      if part.variant == CheckElement.Variant.VarDef:
+        if part.name in varState:
+          Logger.testFailed("Multiple definitions of variable \"" + part.name + "\"",
+                            self.fileName, self.lineNo)
+        varState[part.name] = outputLine[matchStart:matchEnd]
+
+      matchStart = matchEnd
+      isAfterSeparator = False
+
+    # All parts were successfully matched. Return the new variable state.
+    return varState
 
 
 class CheckGroup(CommonEqualityMixin):
diff --git a/tools/checker_test.py b/tools/checker_test.py
index 18152b5..667ca90 100755
--- a/tools/checker_test.py
+++ b/tools/checker_test.py
@@ -60,8 +60,14 @@
 
 
 class TestCheckLine_Parse(unittest.TestCase):
+  def __getPartPattern(self, linePart):
+    if linePart.variant == checker.CheckElement.Variant.Separator:
+      return "\s+"
+    else:
+      return linePart.pattern
+
   def __getRegex(self, checkLine):
-    return "".join(map(lambda x: "(" + x.pattern + ")", checkLine.lineParts))
+    return "".join(map(lambda x: "(" + self.__getPartPattern(x) + ")", checkLine.lineParts))
 
   def __tryParse(self, string):
     return checker.CheckLine(string)
@@ -188,14 +194,17 @@
 
   def test_TextAndWhitespace(self):
     self.__matchSingle("foo", "foo")
-    self.__matchSingle("foo", "XfooX")
+    self.__matchSingle("foo", "  foo  ")
     self.__matchSingle("foo", "foo bar")
+    self.__notMatchSingle("foo", "XfooX")
     self.__notMatchSingle("foo", "zoo")
 
     self.__matchSingle("foo bar", "foo   bar")
     self.__matchSingle("foo bar", "abc foo bar def")
     self.__matchSingle("foo bar", "foo foo bar bar")
-    self.__notMatchSingle("foo bar", "foo abc bar")
+
+    self.__matchSingle("foo bar", "foo X bar")
+    self.__notMatchSingle("foo bar", "foo Xbar")
 
   def test_Pattern(self):
     self.__matchSingle("foo{{A|B}}bar", "fooAbar")