rebaseline_server: add "prefetch" directive that just warms the cache without awaiting results

This will allow the buildbots to warn the production rebaseline_server: "I just
generated some new results; here's a comparison that a human might ask for
soon. Download whatever images and generate whatever diffs you would need to
provide those results."

BUG=skia:1942
NOTREECHECKS=true
NOTRY=true
R=rmistry@google.com

Author: epoger@google.com

Review URL: https://codereview.chromium.org/443013002
diff --git a/gm/rebaseline_server/compare_rendered_pictures.py b/gm/rebaseline_server/compare_rendered_pictures.py
index ed36c7c..4f8f6cd 100755
--- a/gm/rebaseline_server/compare_rendered_pictures.py
+++ b/gm/rebaseline_server/compare_rendered_pictures.py
@@ -70,8 +70,15 @@
                image_base_gs_url=DEFAULT_IMAGE_BASE_GS_URL,
                diff_base_url=None, setA_label='setA',
                setB_label='setB', gs=None,
-               truncate_results=False):
-    """
+               truncate_results=False, prefetch_only=False,
+               download_all_images=False):
+    """Constructor: downloads images and generates diffs.
+
+    Once the object has been created (which may take a while), you can call its
+    get_packaged_results_of_type() method to quickly retrieve the results...
+    unless you have set prefetch_only to True, in which case we will
+    asynchronously warm up the ImageDiffDB cache but not fill in self._results.
+
     Args:
       setA_dirs: list of root directories to copy all JSON summaries from,
           and to use as setA within the comparisons. These directories may be
@@ -93,6 +100,14 @@
       gs: instance of GSUtils object we can use to download summary files
       truncate_results: FOR MANUAL TESTING: if True, truncate the set of images
           we process, to speed up testing.
+      prefetch_only: if True, return the new object as quickly as possible
+          with empty self._results (just queue up all the files to process,
+          don't wait around for them to be processed and recorded); otherwise,
+          block until the results have been assembled and recorded in
+          self._results.
+      download_all_images: if True, download all images, even if we don't
+          need them to generate diffs.  This will take much longer to complete,
+          but is useful for warming up the bitmap cache on local disk.
     """
     super(RenderedPicturesComparisons, self).__init__()
     self._image_diff_db = image_diff_db
@@ -104,6 +119,8 @@
     self._setB_label = setB_label
     self._gs = gs
     self.truncate_results = truncate_results
+    self._prefetch_only = prefetch_only
+    self._download_all_images = download_all_images
 
     tempdir = tempfile.mkdtemp()
     try:
@@ -121,11 +138,12 @@
       self._results = self._load_result_pairs(
           setA_root=setA_root, setA_section=gm_json.JSONKEY_ACTUALRESULTS,
           setB_root=setB_root, setB_section=gm_json.JSONKEY_ACTUALRESULTS)
-      self._timestamp = int(time.time())
-      logging.info('Number of download file collisions: %s' %
-                   imagediffdb.global_file_collisions)
-      logging.info('Results complete; took %d seconds.' %
-                   (self._timestamp - time_start))
+      if self._results:
+        self._timestamp = int(time.time())
+        logging.info('Number of download file collisions: %s' %
+                     imagediffdb.global_file_collisions)
+        logging.info('Results complete; took %d seconds.' %
+                     (self._timestamp - time_start))
     finally:
       shutil.rmtree(tempdir)
 
@@ -141,7 +159,8 @@
       setB_section: which section (gm_json.JSONKEY_ACTUALRESULTS or
           gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setB
 
-    Returns the summary of all image diff results.
+    Returns the summary of all image diff results (or None, depending on
+    self._prefetch_only).
     """
     logging.info('Reading JSON image summaries from dirs %s and %s...' % (
         setA_root, setB_root))
@@ -182,8 +201,9 @@
     dict_num = 0
     for dict_path in union_dict_paths:
       dict_num += 1
-      logging.info('Generating pixel diffs for dict #%d of %d, "%s"...' %
-                   (dict_num, num_union_dict_paths, dict_path))
+      logging.info(
+          'Asynchronously requesting pixel diffs for dict #%d of %d, "%s"...' %
+          (dict_num, num_union_dict_paths, dict_path))
 
       dictA = self.get_default(setA_dicts, None, dict_path)
       self._validate_dict_version(dictA)
@@ -235,12 +255,15 @@
             if result_type != results.KEY__RESULT_TYPE__SUCCEEDED:
               failing_image_pairs.add_image_pair(one_imagepair)
 
-    return {
-      results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict(
-          column_ids_in_order=ORDERED_COLUMN_IDS),
-      results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict(
-          column_ids_in_order=ORDERED_COLUMN_IDS),
-    }
+    if self._prefetch_only:
+      return None
+    else:
+      return {
+          results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict(
+              column_ids_in_order=ORDERED_COLUMN_IDS),
+          results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict(
+              column_ids_in_order=ORDERED_COLUMN_IDS),
+      }
 
   def _validate_dict_version(self, result_dict):
     """Raises Exception if the dict is not the type/version we know how to read.
@@ -320,7 +343,8 @@
           base_url=self._image_base_gs_url,
           imageA_relative_url=imageA_relative_url,
           imageB_relative_url=imageB_relative_url,
-          extra_columns=extra_columns_dict)
+          extra_columns=extra_columns_dict,
+          download_all_images=self._download_all_images)
     except (KeyError, TypeError):
       logging.exception(
           'got exception while creating ImagePair for'
diff --git a/gm/rebaseline_server/imagediffdb.py b/gm/rebaseline_server/imagediffdb.py
index fbe7121..1f93940 100644
--- a/gm/rebaseline_server/imagediffdb.py
+++ b/gm/rebaseline_server/imagediffdb.py
@@ -101,30 +101,29 @@
     actual_image_file = os.path.join(
         storage_root, actual_images_subdir,
         str(actual_image_locator) + image_suffix)
-    try:
-      _download_file(gs, expected_image_file, expected_image_url)
-    except Exception:
-      logging.exception('unable to download expected_image_url %s to file %s' %
-                        (expected_image_url, expected_image_file))
-      raise
-    try:
-      _download_file(gs, actual_image_file, actual_image_url)
-    except Exception:
-      logging.exception('unable to download actual_image_url %s to file %s' %
-                        (actual_image_url, actual_image_file))
-      raise
+    for image_file, image_url in [
+        (expected_image_file, expected_image_url),
+        (actual_image_file, actual_image_url)]:
+      if image_file and image_url:
+        try:
+          _download_file(gs, image_file, image_url)
+        except Exception:
+          logging.exception('unable to download image_url %s to file %s' %
+                            (image_url, image_file))
+          raise
 
-    # Get all diff images and values from skpdiff binary.
+    # Return early if we do not need to generate diffs.
+    if (expected_image_url == actual_image_url or
+        not expected_image_url or not actual_image_url):
+      return
+
+    # Get all diff images and values using the skpdiff binary.
     skpdiff_output_dir = tempfile.mkdtemp()
     try:
       skpdiff_summary_file = os.path.join(skpdiff_output_dir,
                                           'skpdiff-output.json')
       skpdiff_rgbdiff_dir = os.path.join(skpdiff_output_dir, 'rgbDiff')
       skpdiff_whitediff_dir = os.path.join(skpdiff_output_dir, 'whiteDiff')
-      expected_img = os.path.join(storage_root, expected_images_subdir,
-                                  str(expected_image_locator) + image_suffix)
-      actual_img = os.path.join(storage_root, actual_images_subdir,
-                                str(actual_image_locator) + image_suffix)
 
       # TODO(epoger): Consider calling skpdiff ONCE for all image pairs,
       # instead of calling it separately for each image pair.
@@ -133,7 +132,7 @@
       # Con: we would have to wait until all image pairs were loaded before
       # generating any of the diffs?
       find_run_binary.run_command(
-          [SKPDIFF_BINARY, '-p', expected_img, actual_img,
+          [SKPDIFF_BINARY, '-p', expected_image_file, actual_image_file,
            '--jsonp', 'false',
            '--output', skpdiff_summary_file,
            '--differs', 'perceptual', 'different_pixels',
@@ -261,7 +260,9 @@
 
     # Set up the queue for asynchronously loading DiffRecords, and start the
     # worker threads reading from it.
-    self._tasks_queue = Queue.Queue(maxsize=2*num_worker_threads)
+    # The queue maxsize must be 0 (infinite size queue), so that asynchronous
+    # calls can return as soon as possible.
+    self._tasks_queue = Queue.Queue(maxsize=0)
     self._workers = []
     for i in range(num_worker_threads):
       worker = threading.Thread(target=self.worker, args=(i,))
@@ -310,6 +311,9 @@
     If we already have a DiffRecord for this particular image pair, no work
     will be done.
 
+    If expected_image_url (or its locator) is None, just download actual_image.
+    If actual_image_url (or its locator) is None, just download expected_image.
+
     Args:
       expected_image_url: file, GS, or HTTP url from which we will download the
           expected image
@@ -435,10 +439,14 @@
   characters will have special meaning in filenames).
 
   Args:
-    locator: string, or something that can be represented as a string
+    locator: string, or something that can be represented as a string.
+        If None or '', it is returned without modification, because empty
+        locators have a particular meaning ("there is no image for this")
   """
-  return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
-
+  if locator:
+    return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
+  else:
+    return locator
 
 def _get_difference_locator(expected_image_locator, actual_image_locator):
   """Returns the locator string used to look up the diffs between expected_image
diff --git a/gm/rebaseline_server/imagepair.py b/gm/rebaseline_server/imagepair.py
index 12d6718..d9c4cb8 100644
--- a/gm/rebaseline_server/imagepair.py
+++ b/gm/rebaseline_server/imagepair.py
@@ -32,7 +32,8 @@
 
   def __init__(self, image_diff_db,
                base_url, imageA_relative_url, imageB_relative_url,
-               expectations=None, extra_columns=None):
+               expectations=None, extra_columns=None,
+               download_all_images=False):
     """
     Args:
       image_diff_db: ImageDiffDB instance we use to generate/store image diffs
@@ -45,6 +46,9 @@
           metadata (ignore-failure, bug numbers, etc.)
       extra_columns: optional dictionary containing more metadata (test name,
           builder name, etc.)
+      download_all_images: if True, download any images associated with this
+          image pair, even if we don't need them to generate diffs
+          (imageA == imageB, or one of them is missing)
     """
     self._image_diff_db = image_diff_db
     self.base_url = base_url
@@ -63,11 +67,13 @@
       # Later on, we will call image_diff_db.get_diff_record() to find it.
       self._is_different = True
       self._diff_record = _DIFF_RECORD_STILL_LOADING
+
+    if self._diff_record != None or download_all_images:
       image_diff_db.add_image_pair(
           expected_image_locator=imageA_relative_url,
-          expected_image_url=posixpath.join(base_url, imageA_relative_url),
+          expected_image_url=self.posixpath_join(base_url, imageA_relative_url),
           actual_image_locator=imageB_relative_url,
-          actual_image_url=posixpath.join(base_url, imageB_relative_url))
+          actual_image_url=self.posixpath_join(base_url, imageB_relative_url))
 
   def as_dict(self):
     """Returns a dictionary describing this ImagePair.
@@ -97,3 +103,14 @@
     if self._diff_record != None:
       asdict[KEY__IMAGEPAIRS__DIFFERENCES] = self._diff_record.as_dict()
     return asdict
+
+  @staticmethod
+  def posixpath_join(*args):
+    """Wrapper around posixpath.join().
+
+    Returns posixpath.join(*args), or None if any arg is None.
+    """
+    for arg in args:
+      if arg == None:
+        return None
+    return posixpath.join(*args)
diff --git a/gm/rebaseline_server/imagepair_test.py b/gm/rebaseline_server/imagepair_test.py
index ef7695a..5a9f424 100755
--- a/gm/rebaseline_server/imagepair_test.py
+++ b/gm/rebaseline_server/imagepair_test.py
@@ -161,6 +161,23 @@
                 'isDifferent': True,
             },
         ],
+
+        # One of the two images is missing, but download_all_images=True so we
+        # should download it anyway.
+        [
+            # inputs:
+            None,
+            'arcofzorro/13786535001616823825.png',
+            None,
+            None,
+            # expected output:
+            {
+                'imageAUrl': None,
+                'imageBUrl': 'arcofzorro/13786535001616823825.png',
+                'isDifferent': True,
+            },
+        ],
+
     ]
 
     db = imagediffdb.ImageDiffDB(self.temp_dir)
@@ -171,7 +188,8 @@
           imageA_relative_url=selftest[0],
           imageB_relative_url=selftest[1],
           expectations=selftest[2],
-          extra_columns=selftest[3])
+          extra_columns=selftest[3],
+          download_all_images=True)
       self.assertEqual(image_pair.as_dict(), selftest[4])
 
 
diff --git a/gm/rebaseline_server/server.py b/gm/rebaseline_server/server.py
index 6271f97..03bbe74 100755
--- a/gm/rebaseline_server/server.py
+++ b/gm/rebaseline_server/server.py
@@ -73,12 +73,7 @@
 
 PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
 TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(PARENT_DIRECTORY))
-# Directory, relative to PARENT_DIRECTORY, within which the server will serve
-# out image diff data from within the precomputed _SERVER.results .
-PRECOMPUTED_RESULTS_SUBDIR = 'results'
-# Directory, relative to PARENT_DIRECTORY, within which the server will serve
-# out live-generated image diff data.
-LIVE_RESULTS_SUBDIR = 'live-results'
+
 # Directory, relative to PARENT_DIRECTORY, within which the server will serve
 # out static files.
 STATIC_CONTENTS_SUBDIR = 'static'
@@ -87,10 +82,19 @@
 GENERATED_IMAGES_SUBDIR = 'generated-images'
 GENERATED_JSON_SUBDIR = 'generated-json'
 
+# Directives associated with various HTTP GET requests.
+GET__LIVE_RESULTS = 'live-results'
+GET__PRECOMPUTED_RESULTS = 'results'
+GET__PREFETCH_RESULTS = 'prefetch'
+GET__STATIC_CONTENTS = 'static'
+
 # Parameters we use within do_GET_live_results()
 LIVE_PARAM__SET_A_DIR = 'setADir'
 LIVE_PARAM__SET_B_DIR = 'setBDir'
 
+# Parameters we use within do_GET_prefetch_results()
+PREFETCH_PARAM__DOWNLOAD_ONLY_DIFFERING = 'downloadOnlyDifferingImages'
+
 # How often (in seconds) clients should reload while waiting for initial
 # results to load.
 RELOAD_INTERVAL_UNTIL_READY = 10
@@ -170,11 +174,11 @@
       file_handle.write('<li>Expectations vs Actuals</li><ul>')
       for summary_type in SUMMARY_TYPES:
         file_handle.write(
-            '<li><a href="/{static_subdir}/view.html#/view.html?'
-            'resultsToLoad=/{results_subdir}/{summary_type}">'
+            '<li><a href="/{static_directive}/view.html#/view.html?'
+            'resultsToLoad=/{results_directive}/{summary_type}">'
             '{summary_type}</a></li>'.format(
-                results_subdir=PRECOMPUTED_RESULTS_SUBDIR,
-                static_subdir=STATIC_CONTENTS_SUBDIR,
+                results_directive=GET__PRECOMPUTED_RESULTS,
+                static_directive=GET__STATIC_CONTENTS,
                 summary_type=summary_type))
       file_handle.write('</ul>')
     if config_pairs:
@@ -185,7 +189,7 @@
           file_handle.write(
               ' <a href="/%s/view.html#/view.html?'
               'resultsToLoad=/%s/%s/%s-vs-%s_%s.json">%s</a>' % (
-                  STATIC_CONTENTS_SUBDIR, STATIC_CONTENTS_SUBDIR,
+                  GET__STATIC_CONTENTS, GET__STATIC_CONTENTS,
                   GENERATED_JSON_SUBDIR, config_pair[0], config_pair[1],
                   summary_type, summary_type))
         file_handle.write('</li>')
@@ -461,10 +465,10 @@
       logging.debug('do_GET: path="%s"' % self.path)
       if self.path == '' or self.path == '/' or self.path == '/index.html' :
         self.redirect_to('/%s/%s/index.html' % (
-            STATIC_CONTENTS_SUBDIR, GENERATED_HTML_SUBDIR))
+            GET__STATIC_CONTENTS, GENERATED_HTML_SUBDIR))
         return
       if self.path == '/favicon.ico' :
-        self.redirect_to('/%s/favicon.ico' % STATIC_CONTENTS_SUBDIR)
+        self.redirect_to('/%s/favicon.ico' % GET__STATIC_CONTENTS)
         return
 
       # All requests must be of this form:
@@ -474,9 +478,10 @@
       normpath = posixpath.normpath(self.path)
       (dispatcher_name, remainder) = PATHSPLIT_RE.match(normpath).groups()
       dispatchers = {
-          PRECOMPUTED_RESULTS_SUBDIR: self.do_GET_precomputed_results,
-          LIVE_RESULTS_SUBDIR: self.do_GET_live_results,
-          STATIC_CONTENTS_SUBDIR: self.do_GET_static,
+          GET__LIVE_RESULTS: self.do_GET_live_results,
+          GET__PRECOMPUTED_RESULTS: self.do_GET_precomputed_results,
+          GET__PREFETCH_RESULTS: self.do_GET_prefetch_results,
+          GET__STATIC_CONTENTS: self.do_GET_static,
       }
       dispatcher = dispatchers[dispatcher_name]
       dispatcher(remainder)
@@ -537,6 +542,26 @@
     self.send_json_dict(results_obj.get_packaged_results_of_type(
         results_mod.KEY__HEADER__RESULTS_ALL))
 
+  def do_GET_prefetch_results(self, url_remainder):
+    """ Prefetch image diff data for a future do_GET_live_results() call.
+
+    Args:
+      url_remainder: string indicating which image diffs to generate
+    """
+    logging.debug('do_GET_prefetch_results: url_remainder="%s"' % url_remainder)
+    param_dict = urlparse.parse_qs(url_remainder)
+    download_all_images = (
+        param_dict.get(PREFETCH_PARAM__DOWNLOAD_ONLY_DIFFERING, [''])[0].lower()
+        not in ['1', 'true'])
+    compare_rendered_pictures.RenderedPicturesComparisons(
+        setA_dirs=param_dict[LIVE_PARAM__SET_A_DIR],
+        setB_dirs=param_dict[LIVE_PARAM__SET_B_DIR],
+        image_diff_db=_SERVER.image_diff_db,
+        diff_base_url='/static/generated-images',
+        gs=_SERVER.gs, truncate_results=_SERVER.truncate_results,
+        prefetch_only=True, download_all_images=download_all_images)
+    self.send_response(200)
+
   def do_GET_static(self, path):
     """ Handle a GET request for a file under STATIC_CONTENTS_SUBDIR .
     Only allow serving of files within STATIC_CONTENTS_SUBDIR that is a