| #!/usr/bin/env python |
| # Copyright 2013 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Parses CSV output from the loading_measurement and outputs interesting stats. |
| |
| Example usage: |
| $ tools/perf/run_measurement --browser=release \ |
| --output-format=csv --output=/path/to/loading_measurement_output.csv \ |
| loading_measurement tools/perf/page_sets/top_1m.py |
| $ tools/perf/measurements/loading_measurement_analyzer.py \ |
| --num-slowest-urls=100 --rank-csv-file=/path/to/top-1m.csv \ |
| /path/to/loading_measurement_output.csv |
| """ |
| |
| import collections |
| import csv |
| import heapq |
| import optparse |
| import os |
| import re |
| import sys |
| |
| |
| class LoadingMeasurementAnalyzer(object): |
| |
| def __init__(self, input_file, options): |
| self.ranks = {} |
| self.totals = collections.defaultdict(list) |
| self.maxes = collections.defaultdict(list) |
| self.avgs = collections.defaultdict(list) |
| self.load_times = [] |
| self.cpu_times = [] |
| self.network_percents = [] |
| self.num_rows_parsed = 0 |
| self.num_slowest_urls = options.num_slowest_urls |
| if options.rank_csv_file: |
| self._ParseRankCsvFile(os.path.expanduser(options.rank_csv_file)) |
| self._ParseInputFile(input_file, options) |
| self._display_zeros = options.display_zeros |
| |
| def _ParseInputFile(self, input_file, options): |
| with open(input_file, 'r') as csvfile: |
| row_dict = csv.DictReader(csvfile) |
| for row in row_dict: |
| if (options.rank_limit and |
| self._GetRank(row['url']) > options.rank_limit): |
| continue |
| cpu_time = 0 |
| load_time = float(row['load_time (ms)']) |
| if load_time < 0: |
| print 'Skipping %s due to negative load time' % row['url'] |
| continue |
| for key, value in row.iteritems(): |
| if key in ('url', 'load_time (ms)', 'dom_content_loaded_time (ms)'): |
| continue |
| if not value or value == '-': |
| continue |
| value = float(value) |
| if not value: |
| continue |
| if '_avg' in key: |
| self.avgs[key].append((value, row['url'])) |
| elif '_max' in key: |
| self.maxes[key].append((value, row['url'])) |
| else: |
| self.totals[key].append((value, row['url'])) |
| cpu_time += value |
| self.load_times.append((load_time, row['url'])) |
| self.cpu_times.append((cpu_time, row['url'])) |
| if options.show_network: |
| network_time = load_time - cpu_time |
| self.totals['Network (ms)'].append((network_time, row['url'])) |
| self.network_percents.append((network_time / load_time, row['url'])) |
| self.num_rows_parsed += 1 |
| if options.max_rows and self.num_rows_parsed == int(options.max_rows): |
| break |
| |
| def _ParseRankCsvFile(self, input_file): |
| with open(input_file, 'r') as csvfile: |
| for row in csv.reader(csvfile): |
| assert len(row) == 2 |
| self.ranks[row[1]] = int(row[0]) |
| |
| def _GetRank(self, url): |
| url = url.replace('http://', '') |
| if url in self.ranks: |
| return self.ranks[url] |
| return len(self.ranks) |
| |
| def PrintSummary(self, stdout): |
| sum_totals = {} |
| units = None |
| for key, values in self.totals.iteritems(): |
| m = re.match('.* [(](.*)[)]', key) |
| assert m, 'All keys should have units.' |
| assert not units or units == m.group(1), 'All units should be the same.' |
| units = m.group(1) |
| sum_totals[key] = sum([v[0] for v in values]) |
| total_cpu_time = sum([v[0] for v in self.cpu_times]) |
| total_page_load_time = sum([v[0] for v in self.load_times]) |
| |
| print >> stdout |
| print >> stdout, 'Total URLs:', self.num_rows_parsed |
| print >> stdout, 'Total page load time: %ds' % int(round( |
| total_page_load_time / 1000)) |
| print >> stdout, 'Average page load time: %dms' % int(round( |
| total_page_load_time / self.num_rows_parsed)) |
| if units == 'ms': |
| print >> stdout, 'Total CPU time: %ds' % int(round(total_cpu_time / 1000)) |
| print >> stdout, 'Average CPU time: %dms' % int(round( |
| total_cpu_time / self.num_rows_parsed)) |
| print >> stdout |
| for key, value in sorted(sum_totals.iteritems(), reverse=True, |
| key=lambda i: i[1]): |
| if not self._display_zeros and not int(value / 100.): |
| break |
| output_key = '%60s: ' % re.sub(' [(].*[)]', '', key) |
| if units == 'ms': |
| output_value = '%10ds ' % (value / 1000) |
| output_percent = '%.1f%%' % (100 * value / total_page_load_time) |
| else: |
| output_value = '%10d%s ' % (value, units) |
| output_percent = '%.1f%%' % (100 * value / total_cpu_time) |
| print >> stdout, output_key, output_value, output_percent |
| |
| if not self.num_slowest_urls: |
| return |
| |
| for key, values in sorted(self.totals.iteritems(), reverse=True, |
| key=lambda i: sum_totals[i[0]]): |
| if not self._display_zeros and not int(sum_totals[key] / 100.): |
| break |
| print >> stdout |
| print >> stdout, 'Top %d slowest %s:' % (self.num_slowest_urls, |
| re.sub(' [(].*[)]', '', key)) |
| slowest = heapq.nlargest(self.num_slowest_urls, values) |
| for value, url in slowest: |
| print >> stdout, '%10d%s\t%s (#%s)' % (value, units, url, |
| self._GetRank(url)) |
| |
| if self.network_percents: |
| print >> stdout |
| print >> stdout, 'Top %d highest network to CPU time ratios:' % ( |
| self.num_slowest_urls) |
| for percent, url in sorted( |
| self.network_percents, reverse=True)[:self.num_slowest_urls]: |
| percent *= 100 |
| print >> stdout, '\t', '%.1f%%' % percent, url, '(#%s)' % ( |
| self._GetRank(url)) |
| |
| |
| def main(arguments, stdout=sys.stdout): |
| prog_desc = 'Parses CSV output from the loading_measurement' |
| parser = optparse.OptionParser(usage=('%prog [options]' + '\n\n' + prog_desc)) |
| |
| parser.add_option('--max-rows', type='int', |
| help='Only process this many rows') |
| parser.add_option('--num-slowest-urls', type='int', |
| help='Output this many slowest URLs for each category') |
| parser.add_option('--rank-csv-file', help='A CSV file of <rank,url>') |
| parser.add_option('--rank-limit', type='int', |
| help='Only process pages higher than this rank') |
| parser.add_option('--show-network', action='store_true', |
| help='Whether to display Network as a category') |
| parser.add_option('--display-zeros', action='store_true', |
| help='Whether to display categories with zero time') |
| |
| options, args = parser.parse_args(arguments) |
| |
| assert len(args) == 1, 'Must pass exactly one CSV file to analyze' |
| if options.rank_limit and not options.rank_csv_file: |
| print 'Must pass --rank-csv-file with --rank-limit' |
| return 1 |
| |
| LoadingMeasurementAnalyzer(args[0], options).PrintSummary(stdout) |
| |
| return 0 |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main(sys.argv[1:])) |