blob: d1b17d4a3dbf153d12c76370db36ef5f43c25d7d [file] [log] [blame]
# Copyright 2015-2016 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Scheduler specific Functionality for the
stats framework
The Scheduler stats aggregation is based on a signal
which is generated by the combination of two triggers
from the events with the following parameters
========================= ============ =============
EVENT VALUE FILTERS
========================= ============ =============
:func:`sched_switch` 1 next_pid
:func:`sched_switch` -1 prev_pid
========================= ============ =============
Both these Triggers are provided by the event
:mod:`trappy.sched.SchedSwitch` which correspond to
the :code:`sched_switch` unique word in the trace
.. seealso:: :mod:`trappy.stats.Trigger.Trigger`
Using the above information the following signals are
generated.
**EVENT SERIES**
This is a combination of the two triggers as specified
above and has alternating +/- 1 values and is merely
a representation of the position in time when the process
started or stopped running on a CPU
**RESIDENCY SERIES**
This series is a cumulative sum of the event series and
is a representation of the continuous residency of the
process on a CPU
The pivot for the aggregators is the CPU on which the
event occurred on. If N is the number of CPUs in the
system, N signal for each CPU are generated. These signals
can then be aggregated by specifying a Topology
.. seealso:: :mod:`trappy.stats.Topology.Topology`
"""
import numpy as np
from trappy.stats.Trigger import Trigger
WINDOW_SIZE = 0.0001
"""A control config for filter events. Some analyses
may require ignoring of small interruptions"""
# Trigger Values
SCHED_SWITCH_IN = 1
"""Value of the event when a task is **switch in**
or scheduled on a CPU"""
SCHED_SWITCH_OUT = -1
"""Value of the event when a task is **switched out**
or relinquishes a CPU"""
NO_EVENT = 0
"""Signifies no event on an event trace"""
# Field Names
CPU_FIELD = "__cpu"
"""The column in the sched_switch event that
indicates the CPU on which the event occurred
"""
NEXT_PID_FIELD = "next_pid"
"""The column in the sched_switch event that
indicates the PID of the next process to be scheduled
"""
PREV_PID_FIELD = "prev_pid"
"""The column in the sched_switch event that
indicates the PID of the process that was scheduled
in
"""
TASK_RUNNING = 1
"""The column in the sched_switch event that
indicates the CPU on which the event occurred
"""
TASK_NOT_RUNNING = 0
"""In a residency series, a zero indicates
that the task is not running
"""
TIME_INVAL = -1
"""Standard Value to indicate invalid time data"""
SERIES_SANTIZED = "_sched_sanitized"
"""A memoized flag which is set when an event series
is checked for boundary conditions
"""
def sanitize_asymmetry(series, window=None):
"""Sanitize the cases when a :code:`SWITCH_OUT`
happens before a :code:`SWITCH_IN`. (The case when
a process is already running before the trace started)
:param series: Input Time Series data
:type series: :mod:`pandas.Series`
:param window: A tuple indicating a time window
:type window: tuple
"""
if not hasattr(series, SERIES_SANTIZED):
events = series[series != 0]
if len(series) >= 2 and len(events):
if series.values[0] == SCHED_SWITCH_OUT:
series.values[0] = TASK_NOT_RUNNING
elif events.values[0] == SCHED_SWITCH_OUT:
series.values[0] = SCHED_SWITCH_IN
if window:
series.index.values[0] = window[0]
if series.values[-1] == SCHED_SWITCH_IN:
series.values[-1] = TASK_NOT_RUNNING
elif events.values[-1] == SCHED_SWITCH_IN:
series.values[-1] = SCHED_SWITCH_OUT
if window:
series.index.values[-1] = window[1]
# No point if the series just has one value and
# one event. We do not have sufficient data points
# for any calculation. We should Ideally never reach
# here.
elif len(series) == 1:
series.values[0] = 0
setattr(series, SERIES_SANTIZED, True)
return series
def csum(series, window=None, filter_gaps=False):
""":func:`aggfunc` for the cumulative sum of the
input series data
:param series: Input Time Series data
:type series: :mod:`pandas.Series`
:param window: A tuple indicating a time window
:type window: tuple
:param filter_gaps: If set, a process being switched out
for :mod:`bart.sched.functions.WINDOW_SIZE` is
ignored. This is helpful when small interruptions need
to be ignored to compare overall correlation
:type filter_gaps: bool
"""
if filter_gaps:
series = filter_small_gaps(series)
series = series.cumsum()
return select_window(series, window)
def filter_small_gaps(series):
"""A helper function that does filtering of gaps
in residency series < :mod:`bart.sched.functions.WINDOW_SIZE`
:param series: Input Time Series data
:type series: :mod:`pandas.Series`
"""
start = None
for index, value in series.iteritems():
if value == SCHED_SWITCH_IN:
if start == None:
continue
if index - start < WINDOW_SIZE:
series[start] = NO_EVENT
series[index] = NO_EVENT
start = None
if value == SCHED_SWITCH_OUT:
start = index
return series
def first_cpu(series, window=None):
""":func:`aggfunc` to calculate the time of
the first switch in event in the series
This is returned as a vector of unit length
so that it can be aggregated and reduced across
nodes to find the first cpu of a task
:param series: Input Time Series data
:type series: :mod:`pandas.Series`
:param window: A tuple indicating a time window
:type window: tuple
"""
series = select_window(series, window)
series = series[series == SCHED_SWITCH_IN]
if len(series):
return [series.index.values[0]]
else:
return [float("inf")]
def last_cpu(series, window=None):
""":func:`aggfunc` to calculate the time of
the last switch out event in the series
This is returned as a vector of unit length
so that it can be aggregated and reduced across
nodes to find the last cpu of a task
:param series: Input Time Series data
:type series: :mod:`pandas.Series`
:param window: A tuple indicating a time window
:type window: tuple
"""
series = select_window(series, window)
series = series[series == SCHED_SWITCH_OUT]
if len(series):
return [series.index.values[-1]]
else:
return [0]
def select_window(series, window):
"""Helper Function to select a portion of
pandas time series
:param series: Input Time Series data
:type series: :mod:`pandas.Series`
:param window: A tuple indicating a time window
:type window: tuple
"""
if not window:
return series
start, stop = window
ix = series.index
selector = ((ix >= start) & (ix <= stop))
window_series = series[selector]
return window_series
def residency_sum(series, window=None):
""":func:`aggfunc` to calculate the total
residency
The input series is processed for
intervals between a :mod:`bart.sched.functions.SCHED_SWITCH_OUT`
and :mod:`bart.sched.functions.SCHED_SWITCH_IN` to track
additive residency of a task
.. math::
S_{in} = i_{1}, i_{2}...i_{N} \\\\
S_{out} = o_{1}, o_{2}...o_{N} \\\\
R_{total} = \sum_{k}^{N}\Delta_k = \sum_{k}^{N}(o_{k} - i_{k})
:param series: Input Time Series data
:type series: :mod:`pandas.Series`
:param window: A tuple indicating a time window
:type window: tuple
:return: A scalar float value
"""
if not len(series):
return 0.0
org_series = series
series = select_window(series, window)
series = sanitize_asymmetry(series, window)
s_in = series[series == SCHED_SWITCH_IN]
s_out = series[series == SCHED_SWITCH_OUT]
if not (len(s_in) and len(s_out)):
try:
org_series = sanitize_asymmetry(org_series)
running = select_window(org_series.cumsum(), window)
if running.values[0] == TASK_RUNNING and running.values[-1] == TASK_RUNNING:
return window[1] - window[0]
except Exception,e:
pass
if len(s_in) != len(s_out):
raise RuntimeError(
"Unexpected Lengths: s_in={}, s_out={}".format(
len(s_in),
len(s_out)))
else:
return np.sum(s_out.index.values - s_in.index.values)
def first_time(series, value, window=None):
""":func:`aggfunc` to:
- Return the first index where the
series == value
- If no such index is found
+inf is returned
:param series: Input Time Series data
:type series: :mod:`pandas.Series`
:param window: A tuple indicating a time window
:type window: tuple
:return: A vector of Unit Length
"""
series = select_window(series, window)
series = series[series == value]
if not len(series):
return [float("inf")]
return [series.index.values[0]]
def period(series, align="start", window=None):
"""This :func:`aggfunc` returns a tuple
of the average duration between two triggers:
- When :code:`align=start` the :code:`SCHED_IN`
trigger is used
- When :code:`align=end` the :code:`SCHED_OUT`
trigger is used
.. math::
E = e_{1}, e_{2}...e_{N} \\\\
T_p = \\frac{\sum_{j}^{\lfloor N/2 \\rfloor}(e_{2j + 1} - e_{2j})}{N}
:param series: Input Time Series data
:type series: :mod:`pandas.Series`
:param window: A tuple indicating a time window
:type window: tuple
:return:
A list of deltas of successive starts/stops
of a task
"""
series = select_window(series, window)
series = sanitize_asymmetry(series, window)
if align == "start":
series = series[series == SCHED_SWITCH_IN]
elif align == "end":
series = series[series == SCHED_SWITCH_OUT]
if len(series) % 2 == 0:
series = series[:1]
if not len(series):
return []
return list(np.diff(series.index.values))
def last_time(series, value, window=None):
""":func:`aggfunc` to:
- The first index where the
series == value
- If no such index is found
:mod:`bart.sched.functions.TIME_INVAL`
is returned
:param series: Input Time Series data
:type series: :mod:`pandas.Series`
:param window: A tuple indicating a time window
:type window: tuple
:return: A vector of Unit Length
"""
series = select_window(series, window)
series = series[series == value]
if not len(series):
return [TIME_INVAL]
return [series.index.values[-1]]
def binary_correlate(series_x, series_y):
"""Helper function to Correlate binary Data
Both the series should have same indices
For binary time series data:
.. math::
\\alpha_{corr} = \\frac{N_{agree} - N_{disagree}}{N}
:param series_x: First time Series data
:type series_x: :mod:`pandas.Series`
:param series_y: Second time Series data
:type series_y: :mod:`pandas.Series`
"""
if len(series_x) != len(series_y):
raise ValueError("Cannot compute binary correlation for \
unequal vectors")
agree = len(series_x[series_x == series_y])
disagree = len(series_x[series_x != series_y])
return (agree - disagree) / float(len(series_x))
def get_pids_for_process(ftrace, execname, cls=None):
"""Get the PIDs for a given process
:param ftrace: A ftrace object with a sched_switch
event
:type ftrace: :mod:`trappy.ftrace.FTrace`
:param execname: The name of the process
:type execname: str
:param cls: The SchedSwitch event class (required if
a different event is to be used)
:type cls: :mod:`trappy.base.Base`
:return: The set of PIDs for the execname
"""
if not cls:
try:
df = ftrace.sched_switch.data_frame
except AttributeError:
raise ValueError("SchedSwitch event not found in ftrace")
if len(df) == 0:
raise ValueError("SchedSwitch event not found in ftrace")
else:
event = getattr(ftrace, cls.name)
df = event.data_frame
mask = df["next_comm"].apply(lambda x : True if x == execname else False)
return list(np.unique(df[mask]["next_pid"].values))
def get_task_name(ftrace, pid, cls=None):
"""Returns the execname for pid
:param ftrace: A ftrace object with a sched_switch
event
:type ftrace: :mod:`trappy.ftrace.FTrace`
:param pid: The PID of the process
:type pid: int
:param cls: The SchedSwitch event class (required if
a different event is to be used)
:type cls: :mod:`trappy.base.Base`
:return: The execname for the PID
"""
if not cls:
try:
df = ftrace.sched_switch.data_frame
except AttributeError:
raise ValueError("SchedSwitch event not found in ftrace")
else:
event = getattr(ftrace, cls.name)
df = event.data_frame
df = df[df["next_pid"] == pid]
if not len(df):
return ""
else:
return df["next_comm"].values[0]
def sched_triggers(ftrace, pid, sched_switch_class):
"""Returns the list of sched_switch triggers
:param ftrace: A ftrace object with a sched_switch
event
:type ftrace: :mod:`trappy.ftrace.FTrace`
:param pid: The PID of the associated process
:type pid: int
:param sched_switch_class: The SchedSwitch event class
:type sched_switch_class: :mod:`trappy.base.Base`
:return: List of triggers, such that
::
triggers[0] = switch_in_trigger
triggers[1] = switch_out_trigger
"""
if not hasattr(ftrace, "sched_switch"):
raise ValueError("SchedSwitch event not found in ftrace")
triggers = []
triggers.append(sched_switch_in_trigger(ftrace, pid, sched_switch_class))
triggers.append(sched_switch_out_trigger(ftrace, pid, sched_switch_class))
return triggers
def sched_switch_in_trigger(ftrace, pid, sched_switch_class):
"""
:param ftrace: A ftrace object with a sched_switch
event
:type ftrace: :mod:`trappy.ftrace.FTrace`
:param pid: The PID of the associated process
:type pid: int
:param sched_switch_class: The SchedSwitch event class
:type sched_switch_class: :mod:`trappy.base.Base`
:return: :mod:`trappy.stats.Trigger.Trigger` on
the SchedSwitch: IN for the given PID
"""
task_in = {}
task_in[NEXT_PID_FIELD] = pid
return Trigger(ftrace,
sched_switch_class, # trappy Event Class
task_in, # Filter Dictionary
SCHED_SWITCH_IN, # Trigger Value
CPU_FIELD) # Primary Pivot
def sched_switch_out_trigger(ftrace, pid, sched_switch_class):
"""
:param ftrace: A ftrace object with a sched_switch
event
:type ftrace: :mod:`trappy.ftrace.FTrace`
:param pid: The PID of the associated process
:type pid: int
:param sched_switch_class: The SchedSwitch event class
:type sched_switch_class: :mod:`trappy.base.Base`
:return: :mod:`trappy.stats.Trigger.Trigger` on
the SchedSwitch: OUT for the given PID
"""
task_out = {}
task_out[PREV_PID_FIELD] = pid
return Trigger(ftrace,
sched_switch_class, # trappy Event Class
task_out, # Filter Dictionary
SCHED_SWITCH_OUT, # Trigger Value
CPU_FIELD) # Primary Pivot
def trace_event(series, window=None):
"""
:func:`aggfunc` to be used for plotting
the process residency data using
:mod:`trappy.plotter.EventPlot`
:param series: Input Time Series data
:type series: :mod:`pandas.Series`
:param window: A tuple indicating a time window
:type window: tuple
:return: A list of events
of the type:
::
[
[start_time_1, stop_time_1],
[start_time_2, stop_time_2],
#
#
[start_time_N, stop_time_N],
]
"""
rects = []
series = select_window(series, window)
series = sanitize_asymmetry(series, window)
s_in = series[series == SCHED_SWITCH_IN]
s_out = series[series == SCHED_SWITCH_OUT]
if not len(s_in):
return rects
if len(s_in) != len(s_out):
raise RuntimeError(
"Unexpected Lengths: s_in={}, s_out={}".format(
len(s_in),
len(s_out)))
return np.column_stack((s_in.index.values, s_out.index.values))