[autotest] Log test result size tko_job_keyvals

Get the result size information from directory summary and save the data to
tko_job_keyvals table.

BUG=chromium:716218
TEST=local run test, make sure results are recorded in tko table
mysql> select * from tko_job_keyvals as t where job_id=714 AND
  t.key like "%result%";
+------+--------+----------------------------+--------+
| id   | job_id | key                        | value  |
+------+--------+----------------------------+--------+
| 8772 |    714 | result_uploaded_KB         | 557731 |
| 8773 |    714 | result_throttled           | 0      |
| 8781 |    714 | client_result_collected_KB | 569856 |
| 8783 |    714 | original_result_total_KB   | 557731 |
+------+--------+----------------------------+--------+

Change-Id: Ice56082086d3b48b885e5a0d64966ac9598b91e7
Reviewed-on: https://chromium-review.googlesource.com/513520
Commit-Ready: Dan Shi <dshi@google.com>
Tested-by: Dan Shi <dshi@google.com>
Reviewed-by: Dan Shi <dshi@google.com>
diff --git a/client/common_lib/file_utils.py b/client/common_lib/file_utils.py
index 73715ec..b9f2cc7 100644
--- a/client/common_lib/file_utils.py
+++ b/client/common_lib/file_utils.py
@@ -3,8 +3,10 @@
 # found in the LICENSE file.
 
 import errno
+import logging
 import os
 import shutil
+import subprocess
 import urllib2
 
 from autotest_lib.client.common_lib import global_config
@@ -170,3 +172,33 @@
             if not block:
                 break
             local_file.write(block)
+
+
+def get_directory_size_kibibytes_cmd_list(directory):
+    """Returns command to get a directory's total size."""
+    # Having this in its own method makes it easier to mock in
+    # unittests.
+    return ['du', '-sk', directory]
+
+
+def get_directory_size_kibibytes(directory):
+    """Calculate the total size of a directory with all its contents.
+
+    @param directory: Path to the directory
+
+    @return Size of the directory in kibibytes.
+    """
+    cmd = get_directory_size_kibibytes_cmd_list(directory)
+    process = subprocess.Popen(cmd,
+                               stdout=subprocess.PIPE,
+                               stderr=subprocess.PIPE)
+    stdout_data, stderr_data = process.communicate()
+
+    if process.returncode != 0:
+        # This function is used for statistics only, if it fails,
+        # nothing else should crash.
+        logging.warning('Getting size of %s failed. Stderr:', directory)
+        logging.warning(stderr_data)
+        return 0
+
+    return int(stdout_data.split('\t', 1)[0])
\ No newline at end of file
diff --git a/site_utils/gs_offloader.py b/site_utils/gs_offloader.py
index 02efc43..5a263e2 100755
--- a/site_utils/gs_offloader.py
+++ b/site_utils/gs_offloader.py
@@ -32,6 +32,7 @@
 
 import common
 from autotest_lib.client.common_lib import error
+from autotest_lib.client.common_lib import file_utils
 from autotest_lib.client.common_lib import global_config
 from autotest_lib.client.common_lib import utils
 from autotest_lib.client.common_lib.cros.graphite import autotest_es
@@ -238,36 +239,6 @@
     return cmd
 
 
-def get_directory_size_kibibytes_cmd_list(directory):
-    """Returns command to get a directory's total size."""
-    # Having this in its own method makes it easier to mock in
-    # unittests.
-    return ['du', '-sk', directory]
-
-
-def get_directory_size_kibibytes(directory):
-    """Calculate the total size of a directory with all its contents.
-
-    @param directory: Path to the directory
-
-    @return Size of the directory in kibibytes.
-    """
-    cmd = get_directory_size_kibibytes_cmd_list(directory)
-    process = subprocess.Popen(cmd,
-                               stdout=subprocess.PIPE,
-                               stderr=subprocess.PIPE)
-    stdout_data, stderr_data = process.communicate()
-
-    if process.returncode != 0:
-        # This function is used for statistics only, if it fails,
-        # nothing else should crash.
-        logging.warning('Getting size of %s failed. Stderr:', directory)
-        logging.warning(stderr_data)
-        return 0
-
-    return int(stdout_data.split('\t', 1)[0])
-
-
 def get_sanitized_name(name):
     """Get a string with all invalid characters in the name being replaced.
 
@@ -582,7 +553,7 @@
 
                 if LIMIT_FILE_COUNT:
                     limit_file_count(dir_entry)
-                dir_size = get_directory_size_kibibytes(dir_entry)
+                dir_size = file_utils.get_directory_size_kibibytes(dir_entry)
                 es_metadata['size_kb'] = dir_size
 
                 stdout_file = tempfile.TemporaryFile('w+')
diff --git a/tko/parse.py b/tko/parse.py
index 8679cf9..8d08a2a 100755
--- a/tko/parse.py
+++ b/tko/parse.py
@@ -13,6 +13,8 @@
 import traceback
 
 import common
+from autotest_lib.client.bin import result_utils
+from autotest_lib.client.common_lib import file_utils
 from autotest_lib.client.common_lib import global_config
 from autotest_lib.client.common_lib import mail, pidfile
 from autotest_lib.client.common_lib import utils
@@ -31,6 +33,25 @@
     'ParseOptions', ['reparse', 'mail_on_failure', 'dry_run', 'suite_report',
                      'datastore_creds', 'export_to_gcloud_path'])
 
+# Key names related to test result sizes to be stored in tko_job_keyvals.
+# The total size (in kB) of test results that generated during the test,
+# including:
+#  * server side test logs and result files.
+#  * client side test logs, sysinfo, system logs and crash dumps.
+# Note that a test can collect the same test result files from DUT multiple
+# times during the test, before and after each iteration/test. So the value of
+# CLIENT_RESULT_COLLECTED_KB could be larger than the value of
+# RESULT_UPLOADED_KB, which is the size of result directory on the server side,
+# even if the test result throttling is not applied.
+# The total size (in KB) of test results collected from test device.
+CLIENT_RESULT_COLLECTED_KB = 'client_result_collected_KB'
+# The original size (in KB) of test results before being trimmed.
+ORIGINAL_RESULT_TOTAL_KB = 'original_result_total_KB'
+# The total size (in KB) of test results to be uploaded by gs_offloader.
+RESULT_UPLOADED_KB = 'result_uploaded_KB'
+# Flag to indicate if test results collection is throttled.
+RESULT_THROTTLED = 'result_throttled'
+
 def parse_args():
     """Parse args."""
     # build up our options parser and parse sys.argv
@@ -232,6 +253,51 @@
     tko_utils.dprint('DEBUG: Invalidated tests associated to job: ' + msg)
 
 
+def _get_result_sizes(path):
+    """Get the result sizes information.
+
+    It first tries to merge directory summaries and calculate the result sizes
+    including:
+    CLIENT_RESULT_COLLECTED_KB: The volume in KB that's transfered from the test
+            device.
+    ORIGINAL_RESULT_TOTAL_KB: The volume in KB that's the original size of the
+            result files before being trimmed.
+    RESULT_UPLOADED_KB: The volume in KB that will be uploaded.
+    RESULT_THROTTLED: Indicating if the result files were throttled.
+
+    If directory summary merging failed for any reason, fall back to use the
+    total size of the given result directory.
+
+    @param path: Path of the result directory to get size information.
+    @return: A dictionary of result sizes information.
+    """
+    sizes = {}
+    try:
+        client_collected_bytes, summary = result_utils.merge_summaries(path)
+        root_entry = summary[result_utils.ROOT_DIR]
+        sizes[CLIENT_RESULT_COLLECTED_KB] = client_collected_bytes / 1024
+        sizes[ORIGINAL_RESULT_TOTAL_KB] = (
+                root_entry[result_utils.ORIGINAL_SIZE_BYTES]) / 1024
+        sizes[RESULT_UPLOADED_KB] = (
+                root_entry[result_utils.TRIMMED_SIZE_BYTES])/ 1024
+        # Test results are considered to be throttled if the total size of
+        # results collected is different from the total size of trimmed results
+        # from the client side.
+        sizes[RESULT_THROTTLED] = (
+                root_entry[result_utils.ORIGINAL_SIZE_BYTES] !=
+                root_entry[result_utils.TRIMMED_SIZE_BYTES])
+    except:
+        tko_utils.dprint('Failed to calculate result sizes based on directory '
+                         'summaries. Fall back to record the total size.\n'
+                         'Exception: %s' % traceback.format_exc())
+        total_size = file_utils.get_directory_size_kibibytes(path);
+        sizes[CLIENT_RESULT_COLLECTED_KB] = total_size
+        sizes[ORIGINAL_RESULT_TOTAL_KB] = total_size
+        sizes[RESULT_UPLOADED_KB] = total_size
+        sizes[RESULT_THROTTLED] = 0
+    return sizes
+
+
 def parse_one(db, jobname, path, parse_options):
     """Parse a single job. Optionally send email on failure.
 
@@ -331,6 +397,10 @@
         if sponge_url:
             job.keyval_dict['sponge_url'] = sponge_url
 
+    # Record test result size to job_keyvals
+    sizes = _get_result_sizes(path)
+    job.keyval_dict.update(sizes)
+
     # check for failures
     message_lines = [""]
     job_successful = True