android-5.4/ANDROID-sched-Update-max-cpu-capacity-in-case-of-max-frequency-constraints.patch - kernel/common-patches - Git at Google

 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Dietmar Eggemann <dietmar.eggemann@arm.com>
 Date: Sat, 26 Sep 2015 18:19:54 +0100
 Subject: ANDROID: sched: Update max cpu capacity in case of max frequency
  constraints

 Wakeup balancing uses cpu capacity awareness and needs to know the
 system-wide maximum cpu capacity.

 Patch "sched: Store system-wide maximum cpu capacity in root domain"
 finds the system-wide maximum cpu capacity during scheduler domain
 hierarchy setup. This is sufficient as long as maximum frequency
 invariance is not enabled.

 If it is enabled, the system-wide maximum cpu capacity can change
 between scheduler domain hierarchy setups due to frequency capping.

 The cpu capacity is changed in update_cpu_capacity() which is called in
 load balance on the lowest scheduler domain hierarchy level. To be able
 to know if a change in cpu capacity for a certain cpu also has an effect
 on the system-wide maximum cpu capacity it is normally necessary to
 iterate over all cpus. This would be way too costly. That's why this
 patch follows a different approach.

 The unsigned long max_cpu_capacity value in struct root_domain is
 replaced with a struct max_cpu_capacity, containing value (the
 max_cpu_capacity) and cpu (the cpu index of the cpu providing the
 maximum cpu_capacity).

 Changes to the system-wide maximum cpu capacity and the cpu index are
 made if:

  1 System-wide maximum cpu capacity < cpu capacity
  2 System-wide maximum cpu capacity > cpu capacity and cpu index == cpu

 There are no changes to the system-wide maximum cpu capacity in all
 other cases.

 Atomic read and write access to the pair (max_cpu_capacity.val,
 max_cpu_capacity.cpu) is enforced by max_cpu_capacity.lock.

 The access to max_cpu_capacity.val in task_fits_max() is still performed
 without taking the max_cpu_capacity.lock.

 The code to set max cpu capacity in build_sched_domains() has been
 removed because the whole functionality is now provided by
 update_cpu_capacity() instead.

 This approach can introduce errors temporarily, e.g. in case the cpu
 currently providing the max cpu capacity has its cpu capacity lowered
 due to frequency capping and calls update_cpu_capacity() before any cpu
 which might provide the max cpu now.

 Change-Id: I5063befab088fbf49e5d5e484ce0c6ee6165283a
 Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>*
 Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
 (- Fixed cherry-pick issues, and conflict with a0fe2cf086ae "sched/fair:
    Tune down misfit NOHZ kicks" which makes use of max_cpu_capacity
  - Squashed "sched/fair: remove printk while schedule is in progress"
    fix from Caesar Wang <wxt@rock-chips.com>)
 Signed-off-by: Quentin Perret <quentin.perret@arm.com>
 ---
  kernel/sched/fair.c     | 34 ++++++++++++++++++++++++++++++++--
  kernel/sched/sched.h    | 10 +++++++++-
  kernel/sched/topology.c | 15 +++------------
  3 files changed, 44 insertions(+), 15 deletions(-)

 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
 index 00a5292c4508..eeaa334f8832 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
 @@ -6203,7 +6203,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
  		return 0;

  	min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
 -	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
 +	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val;

  	/* Minimum capacity is close to max, no need to abort wake_affine */
  	if (max_cap - min_cap < max_cap >> 3)
 @@ -7775,16 +7775,46 @@ static unsigned long scale_rt_capacity(int cpu, unsigned long max)
  	return scale_irq_capacity(free, irq, max);
  }

 +void init_max_cpu_capacity(struct max_cpu_capacity *mcc) {
 +	raw_spin_lock_init(&mcc->lock);
 +	mcc->val = 0;
 +	mcc->cpu = -1;
 +}
 +
  static void update_cpu_capacity(struct sched_domain *sd, int cpu)
  {
  	unsigned long capacity = arch_scale_cpu_capacity(cpu);
  	struct sched_group *sdg = sd->groups;
 +	struct max_cpu_capacity *mcc;
 +	unsigned long max_capacity;
 +	int max_cap_cpu;
 +	unsigned long flags;

  	cpu_rq(cpu)->cpu_capacity_orig = capacity;

  	capacity *= arch_scale_max_freq_capacity(sd, cpu);
  	capacity >>= SCHED_CAPACITY_SHIFT;

 +	mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
 +
 +	raw_spin_lock_irqsave(&mcc->lock, flags);
 +	max_capacity = mcc->val;
 +	max_cap_cpu = mcc->cpu;
 +
 +	if ((max_capacity > capacity && max_cap_cpu == cpu) ||
 +	    (max_capacity < capacity)) {
 +		mcc->val = capacity;
 +		mcc->cpu = cpu;
 +#ifdef CONFIG_SCHED_DEBUG
 +		raw_spin_unlock_irqrestore(&mcc->lock, flags);
 +		printk_deferred(KERN_INFO "CPU%d: update max cpu_capacity %lu\n",
 +				cpu, capacity);
 +		goto skip_unlock;
 +#endif
 +	}
 +	raw_spin_unlock_irqrestore(&mcc->lock, flags);
 +
 +skip_unlock: __attribute__ ((unused));
  	capacity = scale_rt_capacity(cpu, capacity);

  	if (!capacity)
 @@ -7889,7 +7919,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
  static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
  {
  	return rq->misfit_task_load &&
 -		(rq->cpu_capacity_orig < rq->rd->max_cpu_capacity ||
 +		(rq->cpu_capacity_orig < rq->rd->max_cpu_capacity.val ||
  		 check_cpu_capacity(rq, sd));
  }

 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
 index 164dabe58858..fd7523ec864d 100644
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
 @@ -717,6 +717,12 @@ struct perf_domain {
  	struct rcu_head rcu;
  };

 +struct max_cpu_capacity {
 +	raw_spinlock_t lock;
 +	unsigned long val;
 +	int cpu;
 +};
 +
  /* Scheduling group status flags */
  #define SG_OVERLOAD		0x1 /* More than one runnable task on a CPU. */
  #define SG_OVERUTILIZED		0x2 /* One or more CPUs are over-utilized. */
 @@ -775,7 +781,8 @@ struct root_domain {
  	cpumask_var_t		rto_mask;
  	struct cpupri		cpupri;

 -	unsigned long		max_cpu_capacity;
 +	/* Maximum cpu capacity in the system. */
 +	struct max_cpu_capacity max_cpu_capacity;

  	/*
  	 * NULL-terminated list of performance domains intersecting with the
 @@ -785,6 +792,7 @@ struct root_domain {
  };

  extern void init_defrootdomain(void);
 +extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
  extern int sched_init_domains(const struct cpumask *cpu_map);
  extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
  extern void sched_get_rd(struct root_domain *rd);
 diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
 index 49b835f1305f..4a6a2c0f60c8 100644
 --- a/kernel/sched/topology.c
 +++ b/kernel/sched/topology.c
 @@ -510,6 +510,9 @@ static int init_rootdomain(struct root_domain *rd)

  	if (cpupri_init(&rd->cpupri) != 0)
  		goto free_cpudl;
 +
 +	init_max_cpu_capacity(&rd->max_cpu_capacity);
 +
  	return 0;

  free_cpudl:
 @@ -1951,7 +1954,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
  	enum s_alloc alloc_state = sa_none;
  	struct sched_domain *sd;
  	struct s_data d;
 -	struct rq *rq = NULL;
  	int i, ret = -ENOMEM;
  	struct sched_domain_topology_level *tl_asym;
  	bool has_asym = false;
 @@ -2017,13 +2019,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
  	/* Attach the domains */
  	rcu_read_lock();
  	for_each_cpu(i, cpu_map) {
 -		rq = cpu_rq(i);
  		sd = *per_cpu_ptr(d.sd, i);
 -
 -		/* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
 -		if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
 -			WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
 -
  		cpu_attach_domain(sd, d.rd, i);
  	}
  	rcu_read_unlock();
 @@ -2031,11 +2027,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
  	if (has_asym)
  		static_branch_inc_cpuslocked(&sched_asym_cpucapacity);

 -	if (rq && sched_debug_enabled) {
 -		pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
 -			cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
 -	}
 -
  	ret = 0;
  error:
  	__free_domain_allocs(&d, alloc_state, cpu_map);
	From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
	From: Dietmar Eggemann <dietmar.eggemann@arm.com>
	Date: Sat, 26 Sep 2015 18:19:54 +0100
	Subject: ANDROID: sched: Update max cpu capacity in case of max frequency
	constraints

	Wakeup balancing uses cpu capacity awareness and needs to know the
	system-wide maximum cpu capacity.

	Patch "sched: Store system-wide maximum cpu capacity in root domain"
	finds the system-wide maximum cpu capacity during scheduler domain
	hierarchy setup. This is sufficient as long as maximum frequency
	invariance is not enabled.

	If it is enabled, the system-wide maximum cpu capacity can change
	between scheduler domain hierarchy setups due to frequency capping.

	The cpu capacity is changed in update_cpu_capacity() which is called in
	load balance on the lowest scheduler domain hierarchy level. To be able
	to know if a change in cpu capacity for a certain cpu also has an effect
	on the system-wide maximum cpu capacity it is normally necessary to
	iterate over all cpus. This would be way too costly. That's why this
	patch follows a different approach.

	The unsigned long max_cpu_capacity value in struct root_domain is
	replaced with a struct max_cpu_capacity, containing value (the
	max_cpu_capacity) and cpu (the cpu index of the cpu providing the
	maximum cpu_capacity).

	Changes to the system-wide maximum cpu capacity and the cpu index are
	made if:

	1 System-wide maximum cpu capacity < cpu capacity
	2 System-wide maximum cpu capacity > cpu capacity and cpu index == cpu

	There are no changes to the system-wide maximum cpu capacity in all
	other cases.

	Atomic read and write access to the pair (max_cpu_capacity.val,
	max_cpu_capacity.cpu) is enforced by max_cpu_capacity.lock.

	The access to max_cpu_capacity.val in task_fits_max() is still performed
	without taking the max_cpu_capacity.lock.

	The code to set max cpu capacity in build_sched_domains() has been
	removed because the whole functionality is now provided by
	update_cpu_capacity() instead.

	This approach can introduce errors temporarily, e.g. in case the cpu
	currently providing the max cpu capacity has its cpu capacity lowered
	due to frequency capping and calls update_cpu_capacity() before any cpu
	which might provide the max cpu now.

	Change-Id: I5063befab088fbf49e5d5e484ce0c6ee6165283a
	Signed-off-by: Ionela Voinescu <ionela.voinescu@arm.com>*
	Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
	(- Fixed cherry-pick issues, and conflict with a0fe2cf086ae "sched/fair:
	Tune down misfit NOHZ kicks" which makes use of max_cpu_capacity
	- Squashed "sched/fair: remove printk while schedule is in progress"
	fix from Caesar Wang <wxt@rock-chips.com>)
	Signed-off-by: Quentin Perret <quentin.perret@arm.com>
	---
	kernel/sched/fair.c \| 34 ++++++++++++++++++++++++++++++++--
	kernel/sched/sched.h \| 10 +++++++++-
	kernel/sched/topology.c \| 15 +++------------
	3 files changed, 44 insertions(+), 15 deletions(-)

	diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
	index 00a5292c4508..eeaa334f8832 100644
	--- a/kernel/sched/fair.c
	+++ b/kernel/sched/fair.c
	@@ -6203,7 +6203,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
	return 0;

	min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
	- max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
	+ max_cap = cpu_rq(cpu)->rd->max_cpu_capacity.val;

	/* Minimum capacity is close to max, no need to abort wake_affine */
	if (max_cap - min_cap < max_cap >> 3)
	@@ -7775,16 +7775,46 @@ static unsigned long scale_rt_capacity(int cpu, unsigned long max)
	return scale_irq_capacity(free, irq, max);
	}

	+void init_max_cpu_capacity(struct max_cpu_capacity *mcc) {
	+ raw_spin_lock_init(&mcc->lock);
	+ mcc->val = 0;
	+ mcc->cpu = -1;
	+}
	+
	static void update_cpu_capacity(struct sched_domain *sd, int cpu)
	{
	unsigned long capacity = arch_scale_cpu_capacity(cpu);
	struct sched_group *sdg = sd->groups;
	+ struct max_cpu_capacity *mcc;
	+ unsigned long max_capacity;
	+ int max_cap_cpu;
	+ unsigned long flags;

	cpu_rq(cpu)->cpu_capacity_orig = capacity;

	capacity *= arch_scale_max_freq_capacity(sd, cpu);
	capacity >>= SCHED_CAPACITY_SHIFT;

	+ mcc = &cpu_rq(cpu)->rd->max_cpu_capacity;
	+
	+ raw_spin_lock_irqsave(&mcc->lock, flags);
	+ max_capacity = mcc->val;
	+ max_cap_cpu = mcc->cpu;
	+
	+ if ((max_capacity > capacity && max_cap_cpu == cpu) \|\|
	+ (max_capacity < capacity)) {
	+ mcc->val = capacity;
	+ mcc->cpu = cpu;
	+#ifdef CONFIG_SCHED_DEBUG
	+ raw_spin_unlock_irqrestore(&mcc->lock, flags);
	+ printk_deferred(KERN_INFO "CPU%d: update max cpu_capacity %lu\n",
	+ cpu, capacity);
	+ goto skip_unlock;
	+#endif
	+ }
	+ raw_spin_unlock_irqrestore(&mcc->lock, flags);
	+
	+skip_unlock: __attribute__ ((unused));
	capacity = scale_rt_capacity(cpu, capacity);

	if (!capacity)
	@@ -7889,7 +7919,7 @@ check_cpu_capacity(struct rq rq, struct sched_domain sd)
	static inline int check_misfit_status(struct rq rq, struct sched_domain sd)
	{
	return rq->misfit_task_load &&
	- (rq->cpu_capacity_orig < rq->rd->max_cpu_capacity \|\|
	+ (rq->cpu_capacity_orig < rq->rd->max_cpu_capacity.val \|\|
	check_cpu_capacity(rq, sd));
	}

	diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
	index 164dabe58858..fd7523ec864d 100644
	--- a/kernel/sched/sched.h
	+++ b/kernel/sched/sched.h
	@@ -717,6 +717,12 @@ struct perf_domain {
	struct rcu_head rcu;
	};

	+struct max_cpu_capacity {
	+ raw_spinlock_t lock;
	+ unsigned long val;
	+ int cpu;
	+};
	+
	/* Scheduling group status flags */
	#define SG_OVERLOAD 0x1 /* More than one runnable task on a CPU. */
	#define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */
	@@ -775,7 +781,8 @@ struct root_domain {
	cpumask_var_t rto_mask;
	struct cpupri cpupri;

	- unsigned long max_cpu_capacity;
	+ /* Maximum cpu capacity in the system. */
	+ struct max_cpu_capacity max_cpu_capacity;

	/*
	* NULL-terminated list of performance domains intersecting with the
	@@ -785,6 +792,7 @@ struct root_domain {
	};

	extern void init_defrootdomain(void);
	+extern void init_max_cpu_capacity(struct max_cpu_capacity *mcc);
	extern int sched_init_domains(const struct cpumask *cpu_map);
	extern void rq_attach_root(struct rq rq, struct root_domain rd);
	extern void sched_get_rd(struct root_domain *rd);
	diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
	index 49b835f1305f..4a6a2c0f60c8 100644
	--- a/kernel/sched/topology.c
	+++ b/kernel/sched/topology.c
	@@ -510,6 +510,9 @@ static int init_rootdomain(struct root_domain *rd)

	if (cpupri_init(&rd->cpupri) != 0)
	goto free_cpudl;
	+
	+ init_max_cpu_capacity(&rd->max_cpu_capacity);
	+
	return 0;

	free_cpudl:
	@@ -1951,7 +1954,6 @@ build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr att
	enum s_alloc alloc_state = sa_none;
	struct sched_domain *sd;
	struct s_data d;
	- struct rq *rq = NULL;
	int i, ret = -ENOMEM;
	struct sched_domain_topology_level *tl_asym;
	bool has_asym = false;
	@@ -2017,13 +2019,7 @@ build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr att
	/* Attach the domains */
	rcu_read_lock();
	for_each_cpu(i, cpu_map) {
	- rq = cpu_rq(i);
	sd = *per_cpu_ptr(d.sd, i);
	-
	- /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
	- if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
	- WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
	-
	cpu_attach_domain(sd, d.rd, i);
	}
	rcu_read_unlock();
	@@ -2031,11 +2027,6 @@ build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr att
	if (has_asym)
	static_branch_inc_cpuslocked(&sched_asym_cpucapacity);

	- if (rq && sched_debug_enabled) {
	- pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
	- cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
	- }
	-
	ret = 0;
	error:
	__free_domain_allocs(&d, alloc_state, cpu_map);