| #!/usr/bin/env python3 |
| # |
| # Copyright 2018 - The Android Open Source Project |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Processes profiling data to output latency numbers.""" |
| # |
| # Type "python latency.py -h" for help |
| # |
| # Currently the log data is assumed to be in the following format: |
| # PROF:<event-id> <timestamp> |
| # The <event-id> and <timestamp> can be in the form of any valid |
| # (positive)integer literal in Python |
| # Examples: |
| # PROF:0x0006 0x00000155e0d043f1 |
| # PROF:6 1468355593201 |
| |
| import argparse |
| from collections import defaultdict |
| import csv |
| import logging |
| import math |
| import os |
| import string |
| import xml.etree.ElementTree as ET |
| |
| valid_fname_chars = '-_.()%s%s' % (string.ascii_letters, string.digits) |
| PERCENTILE_STEP = 1 |
| PROFILER_DATA_PREFIX = 'PROF:' |
| |
| |
| class EventPair(object): |
| |
| def __init__(self, pair_id, latency, name): |
| self.pair_id = pair_id |
| self.latency = latency |
| self.name = name |
| |
| |
| class LatencyEntry(object): |
| |
| def __init__(self, start_timestamp, latency): |
| self.start_timestamp = start_timestamp |
| self.latency = latency |
| |
| |
| def parse_xml(xml_file): |
| """ |
| Parse the configuration xml file. |
| |
| Returns: |
| event_pairs_by_pair_id: dict mapping event id to event pair object |
| event_pairs_by_start_id: dict mapping starting event to list of event pairs |
| with that starting event. |
| event_pairs_by_end_id: dict mapping ending event to list of event pairs |
| with that ending event. |
| """ |
| root = ET.parse(xml_file).getroot() |
| event_pairs = root.findall('event-pair') |
| event_pairs_by_pair_id = {} |
| event_pairs_by_start_id = defaultdict(list) |
| event_pairs_by_end_id = defaultdict(list) |
| |
| for event_pair in event_pairs: |
| start_evt = root.find( |
| "./event[@id='{0:}']".format(event_pair.attrib['start-event'])) |
| end_evt = root.find( |
| "./event[@id='{0:}']".format(event_pair.attrib['end-event'])) |
| start = int(start_evt.attrib['id'], 0) |
| end = int(end_evt.attrib['id'], 0) |
| paird_id = start << 32 | end |
| if paird_id in event_pairs_by_pair_id: |
| logging.error('Latency event repeated: start id = %d, end id = %d', |
| start, |
| end) |
| continue |
| # Create the output file name base by concatenating: |
| # "input file name base" + start event name + "_to_" + end event name |
| evt_pair_name = start_evt.attrib['name'] + '_to_' + end_evt.attrib[ |
| 'name'] |
| evt_pair_name = [ |
| c if c in valid_fname_chars else '_' for c in evt_pair_name |
| ] |
| evt_pair_name = ''.join(evt_pair_name) |
| evt_list = EventPair(paird_id, 0, evt_pair_name) |
| event_pairs_by_pair_id[paird_id] = evt_list |
| event_pairs_by_start_id[start].append(evt_list) |
| event_pairs_by_end_id[end].append(evt_list) |
| return (event_pairs_by_pair_id, event_pairs_by_start_id, |
| event_pairs_by_end_id) |
| |
| |
| def percentile_to_index(num_entries, percentile): |
| """ |
| Returns the index in an array corresponding to a percentile. |
| |
| Arguments: |
| num_entries: the number of entries in the array. |
| percentile: which percentile to calculate the index for. |
| Returns: |
| ind: the index in the array corresponding to the percentile. |
| """ |
| ind = int(math.floor(float(num_entries) * percentile / 100)) |
| if ind > 0: |
| ind -= 1 |
| return ind |
| |
| |
| def compute_latencies(input_file, event_pairs_by_start_id, |
| event_pairs_by_end_id): |
| """Parse the input data file and compute latencies.""" |
| line_num = 0 |
| lat_tables_by_pair_id = defaultdict(list) |
| while True: |
| line_num += 1 |
| line = input_file.readline() |
| if not line: |
| break |
| data = line.partition(PROFILER_DATA_PREFIX)[2] |
| if not data: |
| continue |
| try: |
| event_id, timestamp = [int(x, 0) for x in data.split()] |
| except ValueError: |
| logging.error('Badly formed event entry at line #%s: %s', line_num, |
| line) |
| continue |
| # We use event_pair.latency to temporarily store the timestamp |
| # of the start event |
| for event_pair in event_pairs_by_start_id[event_id]: |
| event_pair.latency = timestamp |
| for event_pair in event_pairs_by_end_id[event_id]: |
| # compute the latency only if we have seen the corresponding |
| # start event already |
| if event_pair.latency: |
| lat_tables_by_pair_id[event_pair.pair_id].append( |
| LatencyEntry(event_pair.latency, |
| timestamp - event_pair.latency)) |
| event_pair.latency = 0 |
| return lat_tables_by_pair_id |
| |
| |
| def write_data(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id): |
| for event_id, lat_table in lat_tables_by_pair_id.items(): |
| event_pair = event_pairs_by_pair_id[event_id] |
| with open(fname_base + '_' + event_pair.name + '_data.csv', |
| 'wb') as out_file: |
| csv_writer = csv.writer(out_file) |
| for dat in lat_table: |
| csv_writer.writerow([dat.start_timestamp, dat.latency]) |
| |
| |
| def write_summary(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id): |
| summaries = get_summaries(event_pairs_by_pair_id, lat_tables_by_pair_id) |
| for event_id, lat_table in lat_tables_by_pair_id.items(): |
| event_pair = event_pairs_by_pair_id[event_id] |
| summary = summaries[event_pair.name] |
| latencies = summary['latencies'] |
| num_latencies = summary['num_latencies'] |
| with open(fname_base + '_' + event_pair.name + '_summary.txt', |
| 'wb') as out_file: |
| csv_writer = csv.writer(out_file) |
| csv_writer.writerow(['Percentile', 'Latency']) |
| # Write percentile table |
| for percentile in range(1, 101): |
| ind = percentile_to_index(num_latencies, percentile) |
| csv_writer.writerow([percentile, latencies[ind]]) |
| |
| # Write summary |
| print('\n\nTotal number of samples = {}'.format(num_latencies), |
| file=out_file) |
| print('Min = {}'.format(summary['min_lat']), file=out_file) |
| print('Max = {}'.format(summary['max_lat']), file=out_file) |
| print('Average = {}'.format(summary['average_lat']), file=out_file) |
| print('Median = {}'.format(summary['median_lat']), file=out_file) |
| print('90 %ile = {}'.format(summary['90pctile']), file=out_file) |
| print('95 %ile = {}'.format(summary['95pctile']), file=out_file) |
| |
| |
| def process_latencies(config_xml, input_file): |
| """ |
| End to end function to compute latencies and summaries from input file. |
| Writes latency results to files in current directory. |
| |
| Arguments: |
| config_xml: xml file specifying which event pairs to compute latency |
| btwn. |
| input_file: text file containing the timestamped events, like a log file. |
| """ |
| # Parse the event configuration file |
| (event_pairs_by_pair_id, event_pairs_by_start_id, |
| event_pairs_by_end_id) = parse_xml(config_xml) |
| # Compute latencies |
| lat_tables_by_pair_id = compute_latencies(input_file, |
| event_pairs_by_start_id, |
| event_pairs_by_end_id) |
| fname_base = os.path.splitext(os.path.basename(input_file.name))[0] |
| # Write the latency data and summary to respective files |
| write_data(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id) |
| write_summary(fname_base, event_pairs_by_pair_id, lat_tables_by_pair_id) |
| |
| |
| def get_summaries(event_pairs_by_pair_id, lat_tables_by_pair_id): |
| """ |
| Process significant summaries from a table of latencies. |
| |
| Arguments: |
| event_pairs_by_pair_id: dict mapping event id to event pair object |
| lat_tables_by_pair_id: dict mapping event id to latency table |
| Returns: |
| summaries: dict mapping event pair name to significant summary metrics. |
| """ |
| summaries = {} |
| for event_id, lat_table in lat_tables_by_pair_id.items(): |
| event_summary = {} |
| event_pair = event_pairs_by_pair_id[event_id] |
| latencies = [entry.latency for entry in lat_table] |
| latencies.sort() |
| event_summary['latencies'] = latencies |
| event_summary['num_latencies'] = len(latencies) |
| event_summary['min_lat'] = latencies[0] |
| event_summary['max_lat'] = latencies[-1] |
| event_summary['average_lat'] = sum(latencies) / len(latencies) |
| event_summary['median'] = latencies[len(latencies) // 2] |
| event_summary['90pctile'] = latencies[percentile_to_index( |
| len(latencies), 90)] |
| event_summary['95pctile'] = latencies[percentile_to_index( |
| len(latencies), 95)] |
| summaries[event_pair.name] = event_summary |
| return summaries |
| |
| |
| def get_summaries_from_log(input_file_name, config_xml=None): |
| """ |
| End to end function to compute latencies and summaries from input file. |
| Returns a summary dictionary. |
| |
| Arguments: |
| input_file_name: text file containing the timestamped events, like a |
| log file. |
| config_xml: xml file specifying which event pairs to compute latency btwn. |
| Returns: |
| summaries: dict mapping event pair name to significant summary metrics. |
| """ |
| config_xml = config_xml or os.path.join(os.path.dirname(__file__), |
| 'latency.xml') |
| (event_pairs_by_pair_id, event_pairs_by_start_id, |
| event_pairs_by_end_id) = parse_xml(config_xml) |
| # Compute latencies |
| input_file = open(input_file_name, 'r') |
| lat_tables_by_pair_id = compute_latencies(input_file, |
| event_pairs_by_start_id, |
| event_pairs_by_end_id) |
| return get_summaries(event_pairs_by_pair_id, lat_tables_by_pair_id) |
| |
| |
| if __name__ == '__main__': |
| # Parse command-line arguments |
| parser = argparse.ArgumentParser( |
| description='Processes profiling data to output latency numbers') |
| parser.add_argument( |
| '--events-config', |
| type=argparse.FileType('r'), |
| default=os.path.join(os.path.dirname(__file__), 'latency.xml'), |
| help='The configuration XML file for events.' |
| ' If not specified uses latency.xml from current folder') |
| parser.add_argument( |
| 'input', type=argparse.FileType('r'), help='The input log') |
| args = parser.parse_args() |
| process_latencies(args.events_config, args.input) |