blob: 8a7fb3857159c9a9890109208414eaef6ef8c583 [file] [log] [blame]
# Copyright 2015-2017 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Base class to parse trace.dat dumps"""
import re
import pandas as pd
import warnings
from resource import getrusage, RUSAGE_SELF
def _get_free_memory_kb():
try:
with open("/proc/meminfo") as f:
memfree_line = [l for l in f.readlines() if "MemFree" in l][0]
_, num_kb, _ = memfree_line.split()
return int(num_kb)
except:
# Probably either not running on Linux (no /proc/meminfo), or format has
# changed (we didn't find num_kb).
return None
def trace_parser_explode_array(string, array_lengths):
"""Explode an array in the trace into individual elements for easy parsing
Basically, turn :code:`load={1 1 2 2}` into :code:`load0=1 load1=1 load2=2
load3=2`.
:param string: Input string from the trace
:type string: str
:param array_lengths: A dictionary of array names and their
expected length. If we get array that's shorter than the expected
length, additional keys have to be introduced with value 0 to
compensate.
:type array_lengths: dict
For example:
::
trace_parser_explode_array(string="load={1 2}",
array_lengths={"load": 4})
"load0=1 load1=2 load2=0 load3=0"
"""
while True:
match = re.search(r"[^ ]+={[^}]+}", string)
if match is None:
break
to_explode = match.group()
col_basename = re.match(r"([^=]+)=", to_explode).groups()[0]
vals_str = re.search(r"{(.+)}", to_explode).groups()[0]
vals_array = vals_str.split(' ')
exploded_str = ""
for (idx, val) in enumerate(vals_array):
exploded_str += "{}{}={} ".format(col_basename, idx, val)
vals_added = len(vals_array)
if vals_added < array_lengths[col_basename]:
for idx in range(vals_added, array_lengths[col_basename]):
exploded_str += "{}{}=0 ".format(col_basename, idx)
exploded_str = exploded_str[:-1]
begin_idx = match.start()
end_idx = match.end()
string = string[:begin_idx] + exploded_str + string[end_idx:]
return string
class Base(object):
"""Base class to parse trace.dat dumps.
Don't use directly, create a subclass that has a unique_word class
variable. unique_word is a string that can uniquely identify
lines in the trace that correspond to this event. This is usually
the trace_name (optionally followed by a semicolong,
e.g. "sched_switch:") but it can be anything else for trace points
generated using trace_printk().
:param parse_raw: If :code:`True`, raw trace data (-r option) to
trace-cmd will be used
:param fallback: If :code:`True`, the parsing class will be used
only if no other candidate class's unique_word matched. subclasses
should override this (for ex. TracingMarkWrite uses it)
This class acts as a base class for all TRAPpy events
"""
def __init__(self, parse_raw=False, fallback=False):
self.fallback = fallback
self.tracer = None
self.data_frame = pd.DataFrame()
self.line_array = []
self.data_array = []
self.time_array = []
self.comm_array = []
self.pid_array = []
self.tgid_array = []
self.cpu_array = []
self.parse_raw = parse_raw
self.cached = False
def finalize_object(self):
pass
def __get_trace_array_lengths(self):
"""Calculate the lengths of all arrays in the trace
Returns a dict with the name of each array found in the trace
as keys and their corresponding length as value
"""
from collections import defaultdict
pat_array = re.compile(r"([A-Za-z0-9_]+)={([^}]+)}")
ret = defaultdict(int)
for line in self.data_array:
while True:
match = re.search(pat_array, line)
if not match:
break
(array_name, array_elements) = match.groups()
array_len = len(array_elements.split(' '))
if array_len > ret[array_name]:
ret[array_name] = array_len
line = line[match.end():]
# Stop scanning if the trace doesn't have arrays
if len(ret) == 0:
break
return ret
def append_data(self, time, comm, pid, tgid, cpu, line, data):
"""Append data parsed from a line to the corresponding arrays
The :mod:`DataFrame` will be created from this when the whole trace
has been parsed.
:param time: The time for the line that was printed in the trace
:type time: float
:param comm: The command name or the execname from which the trace
line originated
:type comm: str
:param pid: The PID of the process from which the trace
line originated
:type pid: int
:param data: The data for matching line in the trace
:type data: str
"""
self.time_array.append(time)
self.comm_array.append(comm)
self.pid_array.append(pid)
self.tgid_array.append(tgid)
self.cpu_array.append(cpu)
self.line_array.append(line)
self.data_array.append(data)
def string_cast(self, string, type):
""" Attempt to convert string to another type
Here we attempt to cast string to a type. Currently only
integer conversion is supported with future expansion
left open to other types.
:param string: The value to convert.
:type string: str
:param type: The type to convert to.
:type type: type
"""
# Currently this function only supports int conversion
if type != int:
return
# Handle false-positives for negative numbers
if not string.lstrip("-").isdigit():
return string
return int(string)
def generate_data_dict(self, data_str):
data_dict = {}
prev_key = None
for field in data_str.split():
if "=" not in field:
# Concatenation is supported only for "string" values
if type(data_dict[prev_key]) is not str:
continue
data_dict[prev_key] += ' ' + field
continue
(key, value) = field.split('=', 1)
value = self.string_cast(value, int)
data_dict[key] = value
prev_key = key
return data_dict
def generate_parsed_data(self):
# Get a rough idea of how much memory we have to play with
CHECK_MEM_COUNT = 10000
kb_free = _get_free_memory_kb()
starting_maxrss = getrusage(RUSAGE_SELF).ru_maxrss
check_memory_usage = True
check_memory_count = 1
for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
self.tgid_array, self.cpu_array,
self.line_array, self.data_array):
data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line}
data_dict.update(self.generate_data_dict(data_str))
# When running out of memory, Pandas has been observed to segfault
# rather than throwing a proper Python error.
# Look at how much memory our process is using and warn if we seem
# to be getting close to the system's limit, check it only once
# in the beginning and then every CHECK_MEM_COUNT events
check_memory_count -= 1
if check_memory_usage and check_memory_count == 0:
kb_used = (getrusage(RUSAGE_SELF).ru_maxrss - starting_maxrss)
if kb_free and kb_used > kb_free * 0.9:
warnings.warn("TRAPpy: Appear to be low on memory. "
"If errors arise, try providing more RAM")
check_memory_usage = False
check_memory_count = CHECK_MEM_COUNT
yield data_dict
def create_dataframe(self):
"""Create the final :mod:`pandas.DataFrame`"""
if not self.time_array:
return
trace_arr_lengths = self.__get_trace_array_lengths()
if trace_arr_lengths.items():
for (idx, val) in enumerate(self.data_array):
expl_val = trace_parser_explode_array(val, trace_arr_lengths)
self.data_array[idx] = expl_val
time_idx = pd.Index(self.time_array, name="Time")
self.data_frame = pd.DataFrame(self.generate_parsed_data(), index=time_idx)
self.time_array = []
self.line_array = []
self.comm_array = []
self.pid_array = []
self.cpu_array = []
self.data_array = []
def write_csv(self, fname):
"""Write the csv info into a CSV file
:param fname: The name of the CSV file
:type fname: str
"""
self.data_frame.to_csv(fname)
def read_csv(self, fname):
"""Read the csv data into a DataFrame
:param fname: The name of the CSV file
:type fname: str
"""
self.data_frame = pd.read_csv(fname, index_col = 0)
def normalize_time(self, basetime):
"""Substract basetime from the Time of the data frame
:param basetime: The offset which needs to be subtracted from
the time index
:type basetime: float
"""
if basetime and not self.data_frame.empty:
self.data_frame.reset_index(inplace=True)
self.data_frame["Time"] = self.data_frame["Time"] - basetime
self.data_frame.set_index("Time", inplace=True)