| # Copyright 2015-2017 ARM Limited |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| """Base class to parse trace.dat dumps""" |
| |
| import re |
| import pandas as pd |
| import warnings |
| |
| from resource import getrusage, RUSAGE_SELF |
| |
| def _get_free_memory_kb(): |
| try: |
| with open("/proc/meminfo") as f: |
| memfree_line = [l for l in f.readlines() if "MemFree" in l][0] |
| _, num_kb, _ = memfree_line.split() |
| return int(num_kb) |
| except: |
| # Probably either not running on Linux (no /proc/meminfo), or format has |
| # changed (we didn't find num_kb). |
| return None |
| |
| def trace_parser_explode_array(string, array_lengths): |
| """Explode an array in the trace into individual elements for easy parsing |
| |
| Basically, turn :code:`load={1 1 2 2}` into :code:`load0=1 load1=1 load2=2 |
| load3=2`. |
| |
| :param string: Input string from the trace |
| :type string: str |
| |
| :param array_lengths: A dictionary of array names and their |
| expected length. If we get array that's shorter than the expected |
| length, additional keys have to be introduced with value 0 to |
| compensate. |
| :type array_lengths: dict |
| |
| For example: |
| :: |
| |
| trace_parser_explode_array(string="load={1 2}", |
| array_lengths={"load": 4}) |
| "load0=1 load1=2 load2=0 load3=0" |
| """ |
| |
| while True: |
| match = re.search(r"[^ ]+={[^}]+}", string) |
| if match is None: |
| break |
| |
| to_explode = match.group() |
| col_basename = re.match(r"([^=]+)=", to_explode).groups()[0] |
| vals_str = re.search(r"{(.+)}", to_explode).groups()[0] |
| vals_array = vals_str.split(' ') |
| |
| exploded_str = "" |
| for (idx, val) in enumerate(vals_array): |
| exploded_str += "{}{}={} ".format(col_basename, idx, val) |
| |
| vals_added = len(vals_array) |
| if vals_added < array_lengths[col_basename]: |
| for idx in range(vals_added, array_lengths[col_basename]): |
| exploded_str += "{}{}=0 ".format(col_basename, idx) |
| |
| exploded_str = exploded_str[:-1] |
| begin_idx = match.start() |
| end_idx = match.end() |
| |
| string = string[:begin_idx] + exploded_str + string[end_idx:] |
| |
| return string |
| |
| class Base(object): |
| """Base class to parse trace.dat dumps. |
| |
| Don't use directly, create a subclass that has a unique_word class |
| variable. unique_word is a string that can uniquely identify |
| lines in the trace that correspond to this event. This is usually |
| the trace_name (optionally followed by a semicolong, |
| e.g. "sched_switch:") but it can be anything else for trace points |
| generated using trace_printk(). |
| |
| :param parse_raw: If :code:`True`, raw trace data (-r option) to |
| trace-cmd will be used |
| |
| :param fallback: If :code:`True`, the parsing class will be used |
| only if no other candidate class's unique_word matched. subclasses |
| should override this (for ex. TracingMarkWrite uses it) |
| |
| This class acts as a base class for all TRAPpy events |
| |
| """ |
| def __init__(self, parse_raw=False, fallback=False): |
| self.fallback = fallback |
| self.tracer = None |
| self.data_frame = pd.DataFrame() |
| self.line_array = [] |
| self.data_array = [] |
| self.time_array = [] |
| self.comm_array = [] |
| self.pid_array = [] |
| self.tgid_array = [] |
| self.cpu_array = [] |
| self.parse_raw = parse_raw |
| self.cached = False |
| |
| def finalize_object(self): |
| pass |
| |
| def __get_trace_array_lengths(self): |
| """Calculate the lengths of all arrays in the trace |
| |
| Returns a dict with the name of each array found in the trace |
| as keys and their corresponding length as value |
| |
| """ |
| from collections import defaultdict |
| |
| pat_array = re.compile(r"([A-Za-z0-9_]+)={([^}]+)}") |
| |
| ret = defaultdict(int) |
| |
| for line in self.data_array: |
| while True: |
| match = re.search(pat_array, line) |
| if not match: |
| break |
| |
| (array_name, array_elements) = match.groups() |
| |
| array_len = len(array_elements.split(' ')) |
| |
| if array_len > ret[array_name]: |
| ret[array_name] = array_len |
| |
| line = line[match.end():] |
| |
| # Stop scanning if the trace doesn't have arrays |
| if len(ret) == 0: |
| break |
| |
| return ret |
| |
| def append_data(self, time, comm, pid, tgid, cpu, line, data): |
| """Append data parsed from a line to the corresponding arrays |
| |
| The :mod:`DataFrame` will be created from this when the whole trace |
| has been parsed. |
| |
| :param time: The time for the line that was printed in the trace |
| :type time: float |
| |
| :param comm: The command name or the execname from which the trace |
| line originated |
| :type comm: str |
| |
| :param pid: The PID of the process from which the trace |
| line originated |
| :type pid: int |
| |
| :param data: The data for matching line in the trace |
| :type data: str |
| """ |
| |
| self.time_array.append(time) |
| self.comm_array.append(comm) |
| self.pid_array.append(pid) |
| self.tgid_array.append(tgid) |
| self.cpu_array.append(cpu) |
| self.line_array.append(line) |
| self.data_array.append(data) |
| |
| def string_cast(self, string, type): |
| """ Attempt to convert string to another type |
| |
| Here we attempt to cast string to a type. Currently only |
| integer conversion is supported with future expansion |
| left open to other types. |
| |
| :param string: The value to convert. |
| :type string: str |
| |
| :param type: The type to convert to. |
| :type type: type |
| """ |
| # Currently this function only supports int conversion |
| if type != int: |
| return |
| # Handle false-positives for negative numbers |
| if not string.lstrip("-").isdigit(): |
| return string |
| return int(string) |
| |
| def generate_data_dict(self, data_str): |
| data_dict = {} |
| prev_key = None |
| for field in data_str.split(): |
| if "=" not in field: |
| # Concatenation is supported only for "string" values |
| if type(data_dict[prev_key]) is not str: |
| continue |
| data_dict[prev_key] += ' ' + field |
| continue |
| (key, value) = field.split('=', 1) |
| value = self.string_cast(value, int) |
| data_dict[key] = value |
| prev_key = key |
| return data_dict |
| |
| def generate_parsed_data(self): |
| |
| # Get a rough idea of how much memory we have to play with |
| CHECK_MEM_COUNT = 10000 |
| kb_free = _get_free_memory_kb() |
| starting_maxrss = getrusage(RUSAGE_SELF).ru_maxrss |
| check_memory_usage = True |
| check_memory_count = 1 |
| |
| for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array, |
| self.tgid_array, self.cpu_array, |
| self.line_array, self.data_array): |
| data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line} |
| data_dict.update(self.generate_data_dict(data_str)) |
| |
| # When running out of memory, Pandas has been observed to segfault |
| # rather than throwing a proper Python error. |
| # Look at how much memory our process is using and warn if we seem |
| # to be getting close to the system's limit, check it only once |
| # in the beginning and then every CHECK_MEM_COUNT events |
| check_memory_count -= 1 |
| if check_memory_usage and check_memory_count == 0: |
| kb_used = (getrusage(RUSAGE_SELF).ru_maxrss - starting_maxrss) |
| if kb_free and kb_used > kb_free * 0.9: |
| warnings.warn("TRAPpy: Appear to be low on memory. " |
| "If errors arise, try providing more RAM") |
| check_memory_usage = False |
| check_memory_count = CHECK_MEM_COUNT |
| |
| yield data_dict |
| |
| def create_dataframe(self): |
| """Create the final :mod:`pandas.DataFrame`""" |
| if not self.time_array: |
| return |
| |
| trace_arr_lengths = self.__get_trace_array_lengths() |
| |
| if trace_arr_lengths.items(): |
| for (idx, val) in enumerate(self.data_array): |
| expl_val = trace_parser_explode_array(val, trace_arr_lengths) |
| self.data_array[idx] = expl_val |
| |
| time_idx = pd.Index(self.time_array, name="Time") |
| self.data_frame = pd.DataFrame(self.generate_parsed_data(), index=time_idx) |
| |
| self.time_array = [] |
| self.line_array = [] |
| self.comm_array = [] |
| self.pid_array = [] |
| self.cpu_array = [] |
| self.data_array = [] |
| |
| def write_csv(self, fname): |
| """Write the csv info into a CSV file |
| |
| :param fname: The name of the CSV file |
| :type fname: str |
| """ |
| self.data_frame.to_csv(fname) |
| |
| def read_csv(self, fname): |
| """Read the csv data into a DataFrame |
| |
| :param fname: The name of the CSV file |
| :type fname: str |
| """ |
| self.data_frame = pd.read_csv(fname, index_col = 0) |
| |
| def normalize_time(self, basetime): |
| """Substract basetime from the Time of the data frame |
| |
| :param basetime: The offset which needs to be subtracted from |
| the time index |
| :type basetime: float |
| """ |
| if basetime and not self.data_frame.empty: |
| self.data_frame.reset_index(inplace=True) |
| self.data_frame["Time"] = self.data_frame["Time"] - basetime |
| self.data_frame.set_index("Time", inplace=True) |