| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| From: Dietmar Eggemann <dietmar.eggemann@arm.com> |
| Date: Sat, 26 Sep 2015 18:19:54 +0100 |
| Subject: ANDROID: sched: Update max cpu capacity in case of max frequency |
| constraints |
| |
| Wakeup balancing uses cpu capacity awareness and needs to know the |
| system-wide maximum cpu capacity. |
| |
| Patch "sched: Store system-wide maximum cpu capacity in root domain" |
| finds the system-wide maximum cpu capacity during scheduler domain |
| hierarchy setup. This is sufficient as long as maximum frequency |
| invariance is not enabled. |
| |
| If it is enabled, the system-wide maximum cpu capacity can change |
| between scheduler domain hierarchy setups due to frequency capping. |
| |
| The cpu capacity is changed in update_cpu_capacity() which is called in |
| load balance on the lowest scheduler domain hierarchy level. To be able |
| to know if a change in cpu capacity for a certain cpu also has an effect |
| on the system-wide maximum cpu capacity it is normally necessary to |
| iterate over all cpus. This would be way too costly. That's why this |
| patch follows a different approach. |
| |
| The unsigned long max_cpu_capacity value in struct root_domain is |
| replaced with a struct max_cpu_capacity, containing value (the |
| max_cpu_capacity) and cpu (the cpu index of the cpu providing the |
| maximum cpu_capacity). |
| |
| Changes to the system-wide maximum cpu capacity and the cpu index are |
| made if: |
| |
| 1 System-wide maximum cpu capacity < cpu capacity |
| 2 System-wide maximum cpu capacity > cpu capacity and cpu index == cpu |
| |
| There are no changes to the system-wide maximum cpu capacity in all |
| other cases. |
| |
| Atomic read and write access to the pair (max_cpu_capacity.val, |
| max_cpu_capacity.cpu) is enforced by max_cpu_capacity.lock. |
| |
| The access to max_cpu_capacity.val in task_fits_max() is still performed |
| without taking the max_cpu_capacity.lock. |
| |
| The code to set max cpu capacity in build_sched_domains() has been |
| removed because the whole functionality is now provided by |
| update_cpu_capacity() instead. |
| |
| This approach can introduce errors temporarily, e.g. in case the cpu |
| currently providing the max cpu capacity has its cpu capacity lowered |
| due to frequency capping and calls update_cpu_capacity() before any cpu |
| which might provide the max cpu now. |
| |
| Change-Id: I5063befab088fbf49e5d5e484ce0c6ee6165283a |
| Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>* |
| Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com> |
| (- Fixed cherry-pick issues, and conflict with a0fe2cf086ae "sched/fair: |
| Tune down misfit NOHZ kicks" which makes use of max_cpu_capacity |
| - Squashed "sched/fair: remove printk while schedule is in progress" |
| fix from Caesar Wang <wxt@rock-chips.com>) |
| Signed-off-by: Quentin Perret <quentin.perret@arm.com> |
| --- |
| kernel/sched/fair.c | 34 ++++++++++++++++++++++++++++++++-- |
| kernel/sched/sched.h | 10 +++++++++- |
| kernel/sched/topology.c | 15 +++------------ |
| 3 files changed, 44 insertions(+), 15 deletions(-) |
| |
| diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c |
| index 00a5292c4508..eeaa334f8832 100644 |
| --- a/kernel/sched/fair.c |
| +++ b/kernel/sched/fair.c |
| @@ -6203,7 +6203,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) |
| return 0; |
| |
| min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu)); |
| - max_cap = cpu_rq(cpu)->rd->max_cpu_capacity; |
| + max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val; |
| |
| /* Minimum capacity is close to max, no need to abort wake_affine */ |
| if (max_cap - min_cap < max_cap >> 3) |
| @@ -7775,16 +7775,46 @@ static unsigned long scale_rt_capacity(int cpu, unsigned long max) |
| return scale_irq_capacity(free, irq, max); |
| } |
| |
| +void init_max_cpu_capacity(struct max_cpu_capacity *mcc) { |
| + raw_spin_lock_init(&mcc->lock); |
| + mcc->val = 0; |
| + mcc->cpu = -1; |
| +} |
| + |
| static void update_cpu_capacity(struct sched_domain *sd, int cpu) |
| { |
| unsigned long capacity = arch_scale_cpu_capacity(cpu); |
| struct sched_group *sdg = sd->groups; |
| + struct max_cpu_capacity *mcc; |
| + unsigned long max_capacity; |
| + int max_cap_cpu; |
| + unsigned long flags; |
| |
| cpu_rq(cpu)->cpu_capacity_orig = capacity; |
| |
| capacity *= arch_scale_max_freq_capacity(sd, cpu); |
| capacity >>= SCHED_CAPACITY_SHIFT; |
| |
| + mcc = &cpu_rq(cpu)->rd->max_cpu_capacity; |
| + |
| + raw_spin_lock_irqsave(&mcc->lock, flags); |
| + max_capacity = mcc->val; |
| + max_cap_cpu = mcc->cpu; |
| + |
| + if ((max_capacity > capacity && max_cap_cpu == cpu) || |
| + (max_capacity < capacity)) { |
| + mcc->val = capacity; |
| + mcc->cpu = cpu; |
| +#ifdef CONFIG_SCHED_DEBUG |
| + raw_spin_unlock_irqrestore(&mcc->lock, flags); |
| + printk_deferred(KERN_INFO "CPU%d: update max cpu_capacity %lu\n", |
| + cpu, capacity); |
| + goto skip_unlock; |
| +#endif |
| + } |
| + raw_spin_unlock_irqrestore(&mcc->lock, flags); |
| + |
| +skip_unlock: __attribute__ ((unused)); |
| capacity = scale_rt_capacity(cpu, capacity); |
| |
| if (!capacity) |
| @@ -7889,7 +7919,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd) |
| static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd) |
| { |
| return rq->misfit_task_load && |
| - (rq->cpu_capacity_orig < rq->rd->max_cpu_capacity || |
| + (rq->cpu_capacity_orig < rq->rd->max_cpu_capacity.val || |
| check_cpu_capacity(rq, sd)); |
| } |
| |
| diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h |
| index 164dabe58858..fd7523ec864d 100644 |
| --- a/kernel/sched/sched.h |
| +++ b/kernel/sched/sched.h |
| @@ -717,6 +717,12 @@ struct perf_domain { |
| struct rcu_head rcu; |
| }; |
| |
| +struct max_cpu_capacity { |
| + raw_spinlock_t lock; |
| + unsigned long val; |
| + int cpu; |
| +}; |
| + |
| /* Scheduling group status flags */ |
| #define SG_OVERLOAD 0x1 /* More than one runnable task on a CPU. */ |
| #define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */ |
| @@ -775,7 +781,8 @@ struct root_domain { |
| cpumask_var_t rto_mask; |
| struct cpupri cpupri; |
| |
| - unsigned long max_cpu_capacity; |
| + /* Maximum cpu capacity in the system. */ |
| + struct max_cpu_capacity max_cpu_capacity; |
| |
| /* |
| * NULL-terminated list of performance domains intersecting with the |
| @@ -785,6 +792,7 @@ struct root_domain { |
| }; |
| |
| extern void init_defrootdomain(void); |
| +extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc); |
| extern int sched_init_domains(const struct cpumask *cpu_map); |
| extern void rq_attach_root(struct rq *rq, struct root_domain *rd); |
| extern void sched_get_rd(struct root_domain *rd); |
| diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c |
| index 49b835f1305f..4a6a2c0f60c8 100644 |
| --- a/kernel/sched/topology.c |
| +++ b/kernel/sched/topology.c |
| @@ -510,6 +510,9 @@ static int init_rootdomain(struct root_domain *rd) |
| |
| if (cpupri_init(&rd->cpupri) != 0) |
| goto free_cpudl; |
| + |
| + init_max_cpu_capacity(&rd->max_cpu_capacity); |
| + |
| return 0; |
| |
| free_cpudl: |
| @@ -1951,7 +1954,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att |
| enum s_alloc alloc_state = sa_none; |
| struct sched_domain *sd; |
| struct s_data d; |
| - struct rq *rq = NULL; |
| int i, ret = -ENOMEM; |
| struct sched_domain_topology_level *tl_asym; |
| bool has_asym = false; |
| @@ -2017,13 +2019,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att |
| /* Attach the domains */ |
| rcu_read_lock(); |
| for_each_cpu(i, cpu_map) { |
| - rq = cpu_rq(i); |
| sd = *per_cpu_ptr(d.sd, i); |
| - |
| - /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */ |
| - if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity)) |
| - WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig); |
| - |
| cpu_attach_domain(sd, d.rd, i); |
| } |
| rcu_read_unlock(); |
| @@ -2031,11 +2027,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att |
| if (has_asym) |
| static_branch_inc_cpuslocked(&sched_asym_cpucapacity); |
| |
| - if (rq && sched_debug_enabled) { |
| - pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n", |
| - cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); |
| - } |
| - |
| ret = 0; |
| error: |
| __free_domain_allocs(&d, alloc_state, cpu_map); |