| # SPDX-License-Identifier: Apache-2.0 |
| # |
| # Copyright (C) 2016, ARM Limited and contributors. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| # not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| |
| from collections import namedtuple, OrderedDict |
| from itertools import product |
| import logging |
| import operator |
| import re |
| |
| import pandas as pd |
| import numpy as np |
| |
| from devlib.utils.misc import memoized, mask_to_list |
| from devlib import TargetError |
| |
| """Classes for modeling and estimating energy usage of CPU systems""" |
| |
| def read_multiple_oneline_files(target, glob_patterns): |
| """ |
| Quickly read many single-line files that match a glob pattern |
| |
| Finds all the files that match any of the glob patterns and, assuming that |
| they each contain exactly 1 line of text, read them all at once. When the |
| target or connection is slow this saves a lot of time when reading a large |
| number of files. |
| |
| This will only work safely on stationary files, don't try to use it where |
| the glob expansion will change often - for example /proc/**/autogroup would |
| not work because /proc/ entries will likely appear & disappear while we're |
| reading them. |
| |
| :param target: devlib target object to read from |
| :param glob_pattern: Unix glob pattern matching the files to read |
| :returns: A dictionary mapping matched paths to the values read. ``{}`` if |
| no paths matched the globs. |
| """ |
| find_cmd = 'find ' + ' '.join(glob_patterns) |
| try: |
| paths = target.execute(find_cmd).split() |
| except TargetError: |
| return {} |
| |
| cmd = '{} | {} xargs cat'.format(find_cmd, target.busybox) |
| contents = target.execute(cmd).splitlines() |
| |
| if len(contents) != len(paths): |
| raise RuntimeError('File count mismatch while reading multiple files') |
| |
| return dict(zip(paths, contents)) |
| |
| class EnergyModelCapacityError(Exception): |
| """Used by :meth:`EnergyModel.get_optimal_placements`""" |
| pass |
| |
| class ActiveState(namedtuple('ActiveState', ['capacity', 'power'])): |
| """Represents power and compute capacity at a given frequency |
| |
| :param capacity: Relative compute capacity at frequency |
| :param power: Power usage at frequency |
| """ |
| def __new__(cls, capacity=None, power=None): |
| return super(ActiveState, cls).__new__(cls, capacity, power) |
| |
| class _CpuTree(object): |
| """Internal class. Abstract representation of a CPU topology. |
| |
| Each node contains either a single CPU or a set of child nodes. |
| """ |
| def __init__(self, cpu, children): |
| if (cpu is None) == (children is None): |
| raise ValueError('Provide exactly one of: cpu or children') |
| |
| self.parent = None |
| self.cpu = cpu |
| |
| if cpu is not None: |
| self.cpus = (cpu,) |
| self.children = [] |
| else: |
| if len(children) == 0: |
| raise ValueError('children cannot be empty') |
| self.cpus = tuple(sorted(set().union(*[n.cpus for n in children]))) |
| self.children = children |
| for child in children: |
| child.parent = self |
| |
| self.name = None |
| |
| def __repr__(self): |
| name_bit = '' |
| if self.name: |
| name_bit = 'name="{}", '.format(self.name) |
| |
| if self.children: |
| return '{}({}children={})'.format( |
| self.__class__.__name__, name_bit, self.children) |
| else: |
| return '{}({}cpus={})'.format( |
| self.__class__.__name__, name_bit, self.cpus) |
| |
| def _iter(self, include_non_leaves): |
| for child in self.children: |
| for child_i in child._iter(include_non_leaves): |
| yield child_i |
| if include_non_leaves or not self.children: |
| yield self |
| |
| def iter_nodes(self): |
| """Iterate over nodes depth-first, post-order""" |
| return self._iter(True) |
| |
| def iter_leaves(self): |
| """Iterate over leaves""" |
| return self._iter(False) |
| |
| class EnergyModelNode(_CpuTree): |
| """Describes topology and energy data for an EnergyModel. |
| |
| Represents a CPU topology with energy data. The active and idle state data |
| represents the power usage of just the hardware resources of this topology |
| level, not its children. e.g. If the node represents a cluster, the power |
| numbers should not include power used by the CPU - that power should be |
| included the data of the child nodes. |
| |
| Exactly one of ``cpu`` and ``children`` must be given. |
| |
| :param active_states: Dict mapping frequencies to :class:`ActiveState` |
| values. Compute capacity data is optional for |
| non-leaf nodes. |
| :param idle_states: Dict mapping idle state names to power usage values |
| :param cpu: The CPU this node represents. If provided, this is a leaf node. |
| :type cpus: tuple(int) |
| :param children: Non-empty list of child :class:`EnergyModelNode` objects |
| :param name: Optional human-readable name for this node. Leaf (CPU) nodes |
| have a default name of "cpuN" where N is the cpu number. |
| |
| :ivar cpus: CPUs contained in this node. Includes those of child nodes. |
| :ivar cpu: For convenience, this holds the single CPU contained by leaf |
| nodes. ``None`` for non-leaf nodes. |
| """ |
| def __init__(self, active_states, idle_states, |
| cpu=None, children=None, name=None): |
| super(EnergyModelNode, self).__init__(cpu, children) |
| |
| self._log = logging.getLogger('EnergyModel') |
| |
| def is_monotonic(l, decreasing=False): |
| op = operator.ge if decreasing else operator.le |
| return all(op(a, b) for a, b in zip(l, l[1:])) |
| |
| if active_states: |
| # Sanity check for active_states's frequencies |
| freqs = active_states.keys() |
| if not is_monotonic(freqs): |
| self._log.warning( |
| 'Active states frequencies are expected to be ' |
| 'monotonically increasing. Freqs: {}'.format(freqs)) |
| |
| # Sanity check for active_states's powers |
| power_vals = [s.power for s in active_states.values()] |
| if not is_monotonic(power_vals): |
| self._log.warning( |
| 'Active states powers are expected to be ' |
| 'monotonically increasing. Values: {}'.format(power_vals)) |
| |
| # Sanity check for idle_states powers |
| if idle_states: |
| power_vals = idle_states.values() |
| if not is_monotonic(power_vals, decreasing=True): |
| self._log.warning( |
| 'Idle states powers are expected to be ' |
| 'monotonically decreasing. Values: {}'.format(power_vals)) |
| |
| if cpu is not None and not name: |
| name = 'cpu' + str(cpu) |
| |
| self.name = name |
| self.active_states = active_states |
| self.idle_states = idle_states |
| |
| @property |
| def max_capacity(self): |
| """Compute capacity at highest frequency""" |
| return max(s.capacity for s in self.active_states.values()) |
| |
| class EnergyModelRoot(EnergyModelNode): |
| """ |
| Convenience class for root of an EnergyModelNode tree. |
| |
| Just like EnergyModelNode except that ``active_states`` and ``idle_states`` |
| aren't required. |
| """ |
| def __init__(self, active_states=None, idle_states=None, |
| cpu=None, children=None, name=None): |
| return super(EnergyModelRoot, self).__init__( |
| active_states, idle_states, cpu, children, name) |
| |
| class PowerDomain(_CpuTree): |
| """Describes the power domain hierarchy for an EnergyModel. |
| |
| Power domains are a description of the topological dependencies in hardware |
| for entering idle states. "Composite" states such as cluster-sleep states |
| require a set of CPUs to all be idle before that state can be entered. In |
| that case those CPUs can be grouped into a power domain, and that composite |
| state attached to the power domain. Note that cpuidle is not aware of these |
| dependencies; they are typically handled by the platform firmware. |
| |
| Exactly one of ``cpu`` and ``children`` must be given. That is, leaves of |
| the PowerDomain tree always contain exactly one CPU - each CPU is |
| represented as being in a power domain of its own. This represents the |
| assumption that all CPUs have at least one idle state (such as ARM WFI) that |
| they can enter independently of other CPUs. |
| |
| :param idle_states: List of names of idle states for this power domain. Does |
| not store power data - these names are used as keys into |
| the ``idle_states`` field of :class:`EnergyModelNode` |
| objects. |
| :type idle_states: list(str) |
| :param cpu: The CPU this node represents. If provided, this is a leaf node. |
| :type cpu: int |
| :param children: Non-empty list of child :class:`PowerDomain` objects |
| :type children: list(PowerDomain) |
| |
| :ivar cpus: CPUs contained in this node. Includes those of child nodes. |
| :type cpus: tuple(int) |
| """ |
| def __init__(self, idle_states, cpu=None, children=None): |
| if idle_states is None: |
| raise ValueError('idle_states cannot be None (but may be empty)') |
| super(PowerDomain, self).__init__(cpu, children) |
| self.idle_states = idle_states |
| |
| class EnergyModel(object): |
| """Represents hierarchical CPU topology with power and capacity data |
| |
| An energy model consists of |
| |
| - A CPU topology, representing the physical (cache/interconnect) topology of |
| the CPUs. Each node stores the energy usage of that node's hardware when |
| it is in each active or idle state. They also store a compute capacity at |
| each frequency, but this is only meaningful for leaf nodes (CPUs) and may |
| be None at higher levels. These capacity values are relative; the maximum |
| capacity would usually be 1024, the value of SCHED_CAPACITY_SCALE in the |
| Linux kernel scheduler. Use EnergyModelNodes to describe this. |
| |
| - A power domain topology, representing the hierarchy of areas that can be |
| powered down (idled). |
| The power domains are a single tree. Leaf nodes must contain exactly one |
| CPU and the root node must indirectly contain every CPU. Each power domain |
| has a list (maybe empty) of names of idle states that that domain can |
| enter. |
| Use PowerDomains to describe this. |
| |
| - A set of frequency domains, representing groups of CPUs whose clock |
| frequencies must be equal (probably because they share a clock). The |
| frequency domains must be a partition of the CPUs. |
| |
| :ivar cpu_nodes: List of leaf (CPU) :class:`EnergyModelNode` |
| :ivar cpus: List of logical CPU numbers in the system |
| |
| :param root_node: Root of :class:`EnergyModelNode` tree |
| :param root_power_domain: Root of :class:`PowerDomain` tree |
| :param freq_domains: Collection of collections of logical CPU numbers |
| representing frequency (clock) domains. |
| |
| .. note:: |
| The most signficant shortcomings of the model are: |
| |
| 1. Voltage domains are assumed to be congruent to frequency domains |
| |
| 2. Idle state power is assumed to be independent of voltage |
| |
| 3. Temperature is ignored entirely |
| |
| .. _cpu-utils: |
| |
| .. admonition:: ``cpu_utils``: CPU util distributions |
| |
| Used throughout this module: A ``cpu_utils`` is a list ``u`` where |
| ``u[N]`` is the sum of the frequency-invariant, capacity-invariant |
| utilization of tasks placed on CPU N. That is, the quantity represented |
| by a CPU runqueue's util_avg in the Linux kernel scheduler's |
| load-tracking system with EAS features enabled. |
| |
| The range of utilization values is 0 - |
| :attr:`EnergyModel.capacity_scale`. |
| |
| This represents a static utilization, assuming that tasks don't change |
| in size (for example representing a set of fixed periodic RT-App |
| workloads). For workloads that change over time, a series of |
| ``cpu_utils`` items would be needed to describe the utilization, with a |
| distinct estimation for each item in the series. |
| """ |
| |
| capacity_scale = 1024 |
| """The relative computational capacity of the most powerful CPU at its |
| highest available frequency. |
| """ |
| |
| def __init__(self, root_node, root_power_domain, freq_domains): |
| self.cpus = root_node.cpus |
| if self.cpus != tuple(range(len(self.cpus))): |
| raise ValueError('CPU IDs [{}] are sparse'.format(self.cpus)) |
| |
| # Check that freq_domains is a partition of the CPUs |
| fd_intersection = set().intersection(*freq_domains) |
| if fd_intersection: |
| raise ValueError('CPUs {} exist in multiple freq domains'.format( |
| fd_intersection)) |
| fd_difference = set(self.cpus) - set().union(*freq_domains) |
| if fd_difference: |
| raise ValueError('CPUs {} not in any frequency domain'.format( |
| fd_difference)) |
| self.freq_domains = freq_domains |
| |
| # Check that nodes with energy data are all within a frequency domain |
| for node in root_node.iter_nodes(): |
| if not node.active_states or node.idle_states: |
| continue |
| cpu_freq_doms = [] |
| for cpu in node.cpus: |
| [cpu_freq_dom] = [d for d in freq_domains if cpu in d] |
| cpu_freq_doms.append(cpu_freq_dom) |
| if not all(d == cpu_freq_doms[0] for d in cpu_freq_doms[1:]): |
| raise ValueError( |
| 'Node {} (CPUs {}) ' |
| 'has energy data and overlaps freq domains'.format( |
| node.name, node.cpus)) |
| |
| def sorted_leaves(root): |
| # Get a list of the leaf (cpu) nodes of a _CpuTree in order of the |
| # CPU ID |
| ret = sorted(list(root.iter_leaves()), key=lambda n: n.cpus[0]) |
| assert all(len(n.cpus) == 1 for n in ret) |
| return ret |
| |
| self.root = root_node |
| self.cpu_nodes = sorted_leaves(root_node) |
| self.cpu_pds = sorted_leaves(root_power_domain) |
| assert len(self.cpu_pds) == len(self.cpu_nodes) |
| |
| self._log = logging.getLogger('EnergyModel') |
| |
| max_cap = max(n.max_capacity for n in self.cpu_nodes) |
| if max_cap != self.capacity_scale: |
| self._log.warning( |
| 'Unusual max capacity (%s), overriding capacity_scale', max_cap) |
| self.capacity_scale = max_cap |
| |
| def _cpus_with_capacity(self, cap): |
| """ |
| Helper method to find the CPUs whose max capacity equals cap |
| """ |
| return [c for c in self.cpus |
| if self.cpu_nodes[c].max_capacity == cap] |
| |
| @property |
| @memoized |
| def biggest_cpus(self): |
| """ |
| The CPUs with the highest compute capacity at their highest frequency |
| """ |
| return self._cpus_with_capacity(self.capacity_scale) |
| |
| @property |
| @memoized |
| def littlest_cpus(self): |
| """ |
| The CPUs with the lowest compute capacity at their highest frequency |
| """ |
| min_cap = min(n.max_capacity for n in self.cpu_nodes) |
| return self._cpus_with_capacity(min_cap) |
| |
| @property |
| @memoized |
| def is_heterogeneous(self): |
| """ |
| True iff CPUs do not all have the same efficiency and OPP range |
| """ |
| states = self.cpu_nodes[0].active_states |
| return any(c.active_states != states for c in self.cpu_nodes[1:]) |
| |
| @property |
| @memoized |
| def cpu_groups(self): |
| """ |
| List of lists of CPUs who share the same active state values |
| """ |
| groups = [] |
| for node in self.cpu_nodes: |
| for group in groups: |
| group_states = self.cpu_nodes[group[0]].active_states |
| if node.active_states == group_states: |
| group.append(node.cpu) |
| break |
| else: |
| groups.append([node.cpu]) |
| return groups |
| |
| def _guess_idle_states(self, cpus_active): |
| def find_deepest(pd): |
| if not any(cpus_active[c] for c in pd.cpus): |
| if pd.parent: |
| parent_state = find_deepest(pd.parent) |
| if parent_state: |
| return parent_state |
| return pd.idle_states[-1] if len(pd.idle_states) else None |
| return None |
| |
| return [find_deepest(pd) for pd in self.cpu_pds] |
| |
| def get_cpu_capacity(self, cpu, freq=None): |
| """Convenience method to get the capacity of a CPU at a given frequency |
| |
| :param cpu: CPU to get capacity for |
| :param freq: Frequency to get the CPU capacity at. Default is max |
| capacity. |
| """ |
| if freq is None: |
| return self.cpu_nodes[cpu].max_capacity |
| return self.cpu_nodes[cpu].active_states[freq].capacity |
| |
| def guess_idle_states(self, cpus_active): |
| """Pessimistically guess the idle states that each CPU may enter |
| |
| If a CPU has any tasks it is estimated that it may only enter its |
| shallowest idle state in between task activations. If all the CPUs |
| within a power domain have no tasks, they will all be judged able to |
| enter that domain's deepest idle state. If any CPU in a domain has work, |
| no CPUs in that domain are assumed to enter any domain shared state. |
| |
| e.g. Consider a system with |
| |
| - two power domains PD0 and PD1 |
| |
| - 4 CPUs, with CPUs [0, 1] in PD0 and CPUs [2, 3] in PD1 |
| |
| - 4 idle states: "WFI", "cpu-sleep", "cluster-sleep-0" and |
| "cluster-sleep-1", where the "cluster-sleep-*" states domain states, |
| i.e. a CPU can only enter those states when both CPUs in the domain |
| are idle. |
| |
| Then here are some example inputs and outputs: |
| |
| :: |
| |
| # All CPUs idle: |
| [0, 0, 0, 0] -> ["cluster-sleep-1", "cluster-sleep-1", |
| "cluster-sleep-1", "cluster-sleep-1"] |
| |
| # All CPUs have work |
| [1, 1, 1, 1] -> ["WFI","WFI","WFI", "WFI"] |
| |
| # One power domain active, the other idle |
| [0, 0, 1, 1] -> ["cluster-sleep-1", "cluster-sleep-1", "WFI","WFI"] |
| |
| # One CPU active. |
| # Note that CPU 2 has no work but is assumed to never be able to enter |
| # any "cluster" state. |
| [0, 0, 0, 1] -> ["cluster-sleep-1", "cluster-sleep-1", |
| "cpu-sleep","WFI"] |
| |
| :param cpus_active: list where bool(cpus_active[N]) is False iff no |
| tasks will run on CPU N. |
| :returns: List ``ret`` where ``ret[N]`` is the name of the estimated |
| idle state that CPU N can enter during idle periods. |
| |
| """ |
| states = self._guess_idle_states(cpus_active) |
| return [s or c.idle_states.keys()[0] |
| for s, c in zip(states, self.cpu_nodes)] |
| |
| def _guess_freqs(self, cpu_utils): |
| overutilized = False |
| # Find what frequency each CPU would need if it was alone in its |
| # frequency domain |
| ideal_freqs = [0 for _ in self.cpus] |
| for node in self.cpu_nodes: |
| [cpu] = node.cpus |
| required_cap = cpu_utils[cpu] |
| |
| possible_freqs = [f for f, s in node.active_states.iteritems() |
| if s.capacity >= required_cap] |
| |
| if possible_freqs: |
| ideal_freqs[cpu] = min(possible_freqs) |
| else: |
| # CPU cannot provide required capacity, use max freq |
| ideal_freqs[cpu] = max(node.active_states.keys()) |
| overutilized = True |
| |
| # Rectify the frequencies among domains |
| freqs = [0 for _ in ideal_freqs] |
| for domain in self.freq_domains: |
| domain_freq = max(ideal_freqs[c] for c in domain) |
| for cpu in domain: |
| freqs[cpu] = domain_freq |
| |
| return freqs, overutilized |
| |
| def guess_freqs(self, cpu_utils): |
| """Work out CPU frequencies required to execute a workload |
| |
| Find the lowest possible frequency for each CPU that provides enough |
| capacity to satisfy the utilization, taking into account frequency |
| domains. |
| |
| :param cpu_utils: Utilization distribution, see |
| :ref:`cpu_utils <cpu-utils>` |
| :returns: List ``ret`` where ``ret[N]`` is the frequency that CPU N must |
| run at |
| """ |
| freqs, _ = self._guess_freqs(cpu_utils) |
| return freqs |
| |
| def _estimate_from_active_time(self, cpu_active_time, freqs, idle_states, |
| combine): |
| """Helper for estimate_from_cpu_util |
| |
| Like estimate_from_cpu_util but uses active time i.e. proportion of time |
| spent not-idle in the range 0.0 - 1.0. |
| |
| If combine=False, return idle and active power as separate components. |
| """ |
| power = 0 |
| ret = {} |
| |
| assert all(0.0 <= a <= 1.0 for a in cpu_active_time) |
| |
| for node in self.root.iter_nodes(): |
| # Some nodes might not have energy model data, they could just be |
| # used to group other nodes (likely the root node, for example). |
| if not node.active_states or not node.idle_states: |
| continue |
| |
| cpus = tuple(node.cpus) |
| # For now we assume topology nodes with energy models do not overlap |
| # with frequency domains |
| freq = freqs[cpus[0]] |
| assert all(freqs[c] == freq for c in cpus[1:]) |
| |
| # The active time of a node is estimated as the max of the active |
| # times of its children. |
| # This works great for the synthetic periodic workloads we use in |
| # LISA (where all threads wake up at the same time) but is probably |
| # no good for real workloads. |
| active_time = max(cpu_active_time[c] for c in cpus) |
| active_power = node.active_states[freq].power * active_time |
| |
| _idle_power = max(node.idle_states[idle_states[c]] for c in cpus) |
| idle_power = _idle_power * (1 - active_time) |
| |
| if combine: |
| ret[cpus] = active_power + idle_power |
| else: |
| ret[cpus] = {} |
| ret[cpus]["active"] = active_power |
| ret[cpus]["idle"] = idle_power |
| |
| return ret |
| |
| def estimate_from_cpu_util(self, cpu_utils, freqs=None, idle_states=None): |
| """ |
| Estimate the energy usage of the system under a utilization distribution |
| |
| Optionally also take freqs; a list of frequencies at which each CPU is |
| assumed to run, and idle_states, the idle states that each CPU can enter |
| between activations. If not provided, they will be estimated assuming an |
| ideal selection system (i.e. perfect cpufreq & cpuidle governors). |
| |
| :param cpu_utils: Utilization distribution, see |
| :ref:`cpu_utils <cpu-utils>` |
| :param freqs: List of CPU frequencies. Got from :meth:`guess_freqs` by |
| default. |
| :param idle_states: List of CPU frequencies. Got from |
| :meth:`guess_idle_states` by default. |
| |
| :returns: Dict with power in bogo-Watts (bW), with contributions from |
| each system component keyed with a tuple of the CPUs |
| comprising that component (i.e. :attr:EnergyModelNode.cpus) |
| |
| :: |
| |
| { |
| (0,) : 10, |
| (1,) : 10, |
| (0, 1) : 5, |
| } |
| |
| This represents CPUs 0 and 1 each using 10bW and their shared |
| resources using 5bW for a total of 25bW. |
| """ |
| if len(cpu_utils) != len(self.cpus): |
| raise ValueError( |
| 'cpu_utils length ({}) must equal CPU count ({})'.format( |
| len(cpu_utils), len(self.cpus))) |
| |
| if freqs is None: |
| freqs = self.guess_freqs(cpu_utils) |
| if idle_states is None: |
| idle_states = self.guess_idle_states(cpu_utils) |
| |
| cpu_active_time = [] |
| for cpu, node in enumerate(self.cpu_nodes): |
| assert (cpu,) == node.cpus |
| cap = node.active_states[freqs[cpu]].capacity |
| cpu_active_time.append(min(float(cpu_utils[cpu]) / cap, 1.0)) |
| |
| return self._estimate_from_active_time(cpu_active_time, |
| freqs, idle_states, combine=True) |
| |
| def get_optimal_placements(self, capacities): |
| """Find the optimal distribution of work for a set of tasks |
| |
| Find a list of candidates which are estimated to be optimal in terms of |
| power consumption, but that do not result in any CPU becoming |
| over-utilized. |
| |
| If no such candidates exist, i.e. the system being modeled cannot |
| satisfy the workload's throughput requirements, an |
| :class:`EnergyModelCapacityError` is raised. For example, if e was an |
| EnergyModel modeling two CPUs with capacity 1024, this error would be |
| raised by: |
| |
| :: |
| |
| e.get_optimal_placements({"t1": 800, "t2": 800, "t3: "800"}) |
| |
| This estimation assumes an ideal system of selecting OPPs and idle |
| states for CPUs. |
| |
| .. note:: |
| This is a brute force search taking time exponential wrt. the number |
| of tasks. |
| |
| :param capacities: Dict mapping tasks to expected utilization |
| values. These tasks are assumed not to change; they |
| have a single static utilization value. A set of |
| single-phase periodic RT-App tasks is an example of a |
| suitable workload for this model. |
| :returns: List of ``cpu_utils`` items representing distributions of work |
| under optimal task placements, see |
| :ref:`cpu_utils <cpu-utils>`. Multiple task placements |
| that result in the same CPU utilizations are considered |
| equivalent. |
| """ |
| tasks = capacities.keys() |
| |
| num_candidates = len(self.cpus) ** len(tasks) |
| self._log.debug( |
| '%14s - Searching %d configurations for optimal task placement...', |
| 'EnergyModel', num_candidates) |
| |
| candidates = {} |
| excluded = [] |
| for cpus in product(self.cpus, repeat=len(tasks)): |
| placement = {task: cpu for task, cpu in zip(tasks, cpus)} |
| |
| util = [0 for _ in self.cpus] |
| for task, cpu in placement.items(): |
| util[cpu] += capacities[task] |
| util = tuple(util) |
| |
| # Filter out candidate placements that have tasks greater than max |
| # or that we have already determined that we cannot place. |
| if (any(u > self.capacity_scale for u in util) or util in excluded): |
| continue |
| |
| if util not in candidates: |
| freqs, overutilized = self._guess_freqs(util) |
| if overutilized: |
| # This isn't a valid placement |
| excluded.append(util) |
| else: |
| power = self.estimate_from_cpu_util(util, freqs=freqs) |
| candidates[util] = sum(power.values()) |
| |
| if not candidates: |
| # The system can't provide full throughput to this workload. |
| raise EnergyModelCapacityError( |
| "Can't handle workload - total cap = {}".format( |
| sum(capacities.values()))) |
| |
| # Whittle down to those that give the lowest energy estimate |
| min_power = min(p for p in candidates.itervalues()) |
| ret = [u for u, p in candidates.iteritems() if p == min_power] |
| |
| self._log.debug('%14s - Done', 'EnergyModel') |
| return ret |
| |
| @classmethod |
| def _find_core_groups(cls, target): |
| """ |
| Read the core_siblings masks for each CPU from sysfs |
| |
| :param target: Devlib Target object to read masks from |
| :returns: A list of tuples of ints, representing the partition of core |
| siblings |
| """ |
| cpus = range(target.number_of_cpus) |
| |
| topology_base = '/sys/devices/system/cpu/' |
| |
| # We only care about core_siblings, but let's check *_siblings, so we |
| # can throw an error if a CPU's thread_siblings isn't just itself, or if |
| # there's a topology level we don't understand. |
| |
| # Since we might have to read a lot of files, read everything we need in |
| # one go to avoid taking too long. |
| mask_glob = topology_base + 'cpu**/topology/*_siblings' |
| file_values = read_multiple_oneline_files(target, [mask_glob]) |
| |
| regex = re.compile( |
| topology_base + r'cpu([0-9]+)/topology/([a-z]+)_siblings') |
| |
| ret = set() |
| |
| for path, mask_str in file_values.iteritems(): |
| match = regex.match(path) |
| cpu = int(match.groups()[0]) |
| level = match.groups()[1] |
| # mask_to_list returns the values in descending order, so we'll sort |
| # them ascending. This isn't strictly necessary but it's nicer. |
| siblings = tuple(sorted(mask_to_list(int(mask_str, 16)))) |
| |
| if level == 'thread': |
| if siblings != (cpu,): |
| # SMT systems aren't supported |
| raise RuntimeError('CPU{} thread_siblings is {}. ' |
| 'expected {}'.format(cpu, siblings, [cpu])) |
| continue |
| if level != 'core': |
| # The only other levels we should expect to find are 'book' and |
| # 'shelf', which are not used by architectures we support. |
| raise RuntimeError( |
| 'Unrecognised topology level "{}"'.format(level)) |
| |
| ret.add(siblings) |
| |
| # Sort core groups so that the lowest-numbered cores are first |
| # Again, not strictly necessary, just more pleasant. |
| return sorted(ret, key=lambda x: x[0]) |
| |
| @classmethod |
| def from_target(cls, target): |
| """ |
| Create an EnergyModel by reading a target filesystem |
| |
| This uses the sysctl added by EAS pathces to exposes the cap_states and |
| idle_states fields for each sched_group. This feature depends on |
| CONFIG_SCHED_DEBUG, and is not upstream in mainline Linux (as of v4.11), |
| so this method is only tested with Android kernels. |
| |
| The kernel doesn't have an power domain data, so this method assumes |
| that all CPUs are totally independent wrt. idle states - the EnergyModel |
| constructed won't be aware of the topological dependencies for entering |
| "cluster" idle states. |
| |
| Assumes the energy model has two-levels (plus the root) - a level for |
| CPUs and a level for 'clusters'. |
| |
| :param target: Devlib target object to read filesystem from. Must have |
| cpufreq and cpuidle modules enabled. |
| :returns: Constructed EnergyModel object based on the parameters |
| reported by the target. |
| """ |
| if 'cpufreq' not in target.modules: |
| raise TargetError('Requires cpufreq devlib module. Please ensure ' |
| '"cpufreq" is listed in your target/test modules') |
| if 'cpuidle' not in target.modules: |
| raise TargetError('Requires cpuidle devlib module. Please ensure ' |
| '"cpuidle" is listed in your target/test modules') |
| |
| def sge_path(cpu, domain, group, field): |
| f = '/proc/sys/kernel/sched_domain/cpu{}/domain{}/group{}/energy/{}' |
| return f.format(cpu, domain, group, field) |
| |
| # Read all the files we might need in one go, otherwise this will take |
| # ages. |
| sge_globs = [sge_path('**', '**', '**', 'cap_states'), |
| sge_path('**', '**', '**', 'idle_states')] |
| sge_file_values = read_multiple_oneline_files(target, sge_globs) |
| |
| if not sge_file_values: |
| raise TargetError('Energy Model not exposed in sysfs. ' |
| 'Check CONFIG_SCHED_DEBUG is enabled.') |
| |
| # These functions read the cap_states and idle_states vectors for the |
| # first sched_group in the sched_domain for a given CPU at a given |
| # level. That first group will include the given CPU. So |
| # read_active_states(0, 0) will give the CPU-level active_states for |
| # CPU0 and read_active_states(0, 1) will give the "cluster"-level |
| # active_states for the "cluster" that contains CPU0. |
| |
| def read_sge_file(path): |
| try: |
| return sge_file_values[path] |
| except KeyError as e: |
| raise TargetError('No such file: {}'.format(e)) |
| |
| def read_active_states(cpu, domain_level): |
| cap_states_path = sge_path(cpu, domain_level, 0, 'cap_states') |
| cap_states_strs = read_sge_file(cap_states_path).split() |
| |
| # cap_states lists the capacity of each state followed by its power, |
| # in increasing order. The `zip` call does this: |
| # [c0, p0, c1, p1, c2, p2] -> [(c0, p0), (c1, p1), (c2, p2)] |
| cap_states = [ActiveState(capacity=int(c), power=int(p)) |
| for c, p in zip(cap_states_strs[0::2], |
| cap_states_strs[1::2])] |
| freqs = target.cpufreq.list_frequencies(cpu) |
| return OrderedDict(zip(sorted(freqs), cap_states)) |
| |
| def read_idle_states(cpu, domain_level): |
| idle_states_path = sge_path(cpu, domain_level, 0, 'idle_states') |
| idle_states_strs = read_sge_file(idle_states_path).split() |
| |
| # get_states should return the state names in increasing depth order |
| names = [s.name for s in target.cpuidle.get_states(cpu)] |
| # idle_states is a list of power values in increasing order of |
| # idle-depth/decreasing order of power. |
| return OrderedDict(zip(names, [int(p) for p in idle_states_strs])) |
| |
| # Read the CPU-level data from sched_domain level 0 |
| cpus = range(target.number_of_cpus) |
| cpu_nodes = [] |
| for cpu in cpus: |
| node = EnergyModelNode( |
| cpu=cpu, |
| active_states=read_active_states(cpu, 0), |
| idle_states=read_idle_states(cpu, 0)) |
| cpu_nodes.append(node) |
| |
| # Read the "cluster" level data from sched_domain level 1 |
| core_group_nodes = [] |
| for core_group in cls._find_core_groups(target): |
| node=EnergyModelNode( |
| children=[cpu_nodes[c] for c in core_group], |
| active_states=read_active_states(core_group[0], 1), |
| idle_states=read_idle_states(core_group[0], 1)) |
| core_group_nodes.append(node) |
| |
| root = EnergyModelRoot(children=core_group_nodes) |
| |
| # Use cpufreq to figure out the frequency domains |
| freq_domains = [] |
| remaining_cpus = set(cpus) |
| while remaining_cpus: |
| cpu = next(iter(remaining_cpus)) |
| dom = target.cpufreq.get_domain_cpus(cpu) |
| freq_domains.append(dom) |
| remaining_cpus = remaining_cpus.difference(dom) |
| |
| # We don't have a way to read the power domains from sysfs (the kernel |
| # isn't even aware of them) so we'll just have to assume each CPU is its |
| # own power domain and all idle states are independent of each other. |
| cpu_pds = [] |
| for cpu in cpus: |
| names = [s.name for s in target.cpuidle.get_states(cpu)] |
| cpu_pds.append(PowerDomain(cpu=cpu, idle_states=names)) |
| |
| root_pd=PowerDomain(children=cpu_pds, idle_states=[]) |
| |
| return cls(root_node=root, |
| root_power_domain=root_pd, |
| freq_domains=freq_domains) |