kernel/sched/cpufreq_sched.c - kernel/msm.git - Git at Google

 /*
  *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */

 #include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
 #include <linux/percpu.h>
 #include <linux/irq_work.h>
 #include <linux/delay.h>
 #include <linux/string.h>

 #define CREATE_TRACE_POINTS
 #include <trace/events/cpufreq_sched.h>

 #include "sched.h"

 #define THROTTLE_DOWN_NSEC	50000000 /* 50ms default */
 #define THROTTLE_UP_NSEC	500000 /* 500us default */

 struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
 static bool __read_mostly cpufreq_driver_slow;

 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
 static struct cpufreq_governor cpufreq_gov_sched;
 #endif

 static DEFINE_PER_CPU(unsigned long, enabled);
 DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);

 /**
  * gov_data - per-policy data internal to the governor
  * @up_throttle: next throttling period expiry if increasing OPP
  * @down_throttle: next throttling period expiry if decreasing OPP
  * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
  * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
  * @task: worker thread for dvfs transition that may block/sleep
  * @irq_work: callback used to wake up worker thread
  * @requested_freq: last frequency requested by the sched governor
  *
  * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
  * per-policy instance of it is created when the cpufreq_sched governor receives
  * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
  * member of struct cpufreq_policy.
  *
  * Readers of this data must call down_read(policy->rwsem). Writers must
  * call down_write(policy->rwsem).
  */
 struct gov_data {
 	ktime_t up_throttle;
 	ktime_t down_throttle;
 	unsigned int up_throttle_nsec;
 	unsigned int down_throttle_nsec;
 	struct task_struct *task;
 	struct irq_work irq_work;
 	unsigned int requested_freq;
 };

 static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
 					    unsigned int freq)
 {
 	struct gov_data *gd = policy->governor_data;

 	/* avoid race with cpufreq_sched_stop */
 	if (!down_write_trylock(&policy->rwsem))
 		return;

 	__cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);

 	gd->up_throttle = ktime_add_ns(ktime_get(), gd->up_throttle_nsec);
 	gd->down_throttle = ktime_add_ns(ktime_get(), gd->down_throttle_nsec);
 	up_write(&policy->rwsem);
 }

 static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
 {
 	ktime_t now = ktime_get();

 	ktime_t throttle = gd->requested_freq < cur_freq ?
 		gd->down_throttle : gd->up_throttle;

 	if (ktime_after(now, throttle))
 		return false;

 	while (1) {
 		int usec_left = ktime_to_ns(ktime_sub(throttle, now));

 		usec_left /= NSEC_PER_USEC;
 		trace_cpufreq_sched_throttled(usec_left);
 		usleep_range(usec_left, usec_left + 100);
 		now = ktime_get();
 		if (ktime_after(now, throttle))
 			return true;
 	}
 }

 /*
  * we pass in struct cpufreq_policy. This is safe because changing out the
  * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
  * which tears down all of the data structures and __cpufreq_governor(policy,
  * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
  * new policy pointer
  */
 static int cpufreq_sched_thread(void *data)
 {
 	struct sched_param param;
 	struct cpufreq_policy *policy;
 	struct gov_data *gd;
 	unsigned int new_request = 0;
 	unsigned int last_request = 0;
 	int ret;

 	policy = (struct cpufreq_policy *) data;
 	gd = policy->governor_data;

 	param.sched_priority = 50;
 	ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
 	if (ret) {
 		pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
 		do_exit(-EINVAL);
 	} else {
 		pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
 				__func__, gd->task->pid);
 	}

 	do {
 		new_request = gd->requested_freq;
 		if (new_request == last_request) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (kthread_should_stop())
 				break;
 			schedule();
 		} else {
 			/*
 			 * if the frequency thread sleeps while waiting to be
 			 * unthrottled, start over to check for a newer request
 			 */
 			if (finish_last_request(gd, policy->cur))
 				continue;
 			last_request = new_request;
 			cpufreq_sched_try_driver_target(policy, new_request);
 		}
 	} while (!kthread_should_stop());

 	return 0;
 }

 static void cpufreq_sched_irq_work(struct irq_work *irq_work)
 {
 	struct gov_data *gd;

 	gd = container_of(irq_work, struct gov_data, irq_work);
 	if (!gd)
 		return;

 	wake_up_process(gd->task);
 }

 static void update_fdomain_capacity_request(int cpu)
 {
 	unsigned int freq_new, index_new, cpu_tmp;
 	struct cpufreq_policy *policy;
 	struct gov_data *gd;
 	unsigned long capacity = 0;

 	/*
 	 * Avoid grabbing the policy if possible. A test is still
 	 * required after locking the CPU's policy to avoid racing
 	 * with the governor changing.
 	 */
 	if (!per_cpu(enabled, cpu))
 		return;

 	policy = cpufreq_cpu_get(cpu);
 	if (IS_ERR_OR_NULL(policy))
 		return;

 	if (policy->governor != &cpufreq_gov_sched ||
 	    !policy->governor_data)
 		goto out;

 	gd = policy->governor_data;

 	/* find max capacity requested by cpus in this policy */
 	for_each_cpu(cpu_tmp, policy->cpus) {
 		struct sched_capacity_reqs *scr;

 		scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
 		capacity = max(capacity, scr->total);
 	}

 	/* Convert the new maximum capacity request into a cpu frequency */
 	freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
 	if (cpufreq_frequency_table_target(policy, policy->freq_table,
 					   freq_new, CPUFREQ_RELATION_L,
 					   &index_new))
 		goto out;
 	freq_new = policy->freq_table[index_new].frequency;

 	if (freq_new > policy->max)
 		freq_new = policy->max;

 	if (freq_new < policy->min)
 		freq_new = policy->min;

 	trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
 					gd->requested_freq);
 	if (freq_new == gd->requested_freq)
 		goto out;

 	gd->requested_freq = freq_new;

 	/*
 	 * Throttling is not yet supported on platforms with fast cpufreq
 	 * drivers.
 	 */
 	if (cpufreq_driver_slow)
 		irq_work_queue_on(&gd->irq_work, cpu);
 	else
 		cpufreq_sched_try_driver_target(policy, freq_new);

 out:
 	cpufreq_cpu_put(policy);
 }

 void update_cpu_capacity_request(int cpu, bool request)
 {
 	unsigned long new_capacity;
 	struct sched_capacity_reqs *scr;

 	/* The rq lock serializes access to the CPU's sched_capacity_reqs. */
 	lockdep_assert_held(&cpu_rq(cpu)->lock);

 	scr = &per_cpu(cpu_sched_capacity_reqs, cpu);

 	new_capacity = scr->cfs + scr->rt;
 	new_capacity = new_capacity * capacity_margin
 		/ SCHED_CAPACITY_SCALE;
 	new_capacity += scr->dl;

 	if (new_capacity == scr->total)
 		return;

 	trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);

 	scr->total = new_capacity;
 	if (request)
 		update_fdomain_capacity_request(cpu);
 }

 static inline void set_sched_freq(void)
 {
 	static_key_slow_inc(&__sched_freq);
 }

 static inline void clear_sched_freq(void)
 {
 	static_key_slow_dec(&__sched_freq);
 }

 static struct attribute_group sched_attr_group_gov_pol;
 static struct attribute_group *get_sysfs_attr(void)
 {
 	return &sched_attr_group_gov_pol;
 }

 static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
 {
 	struct gov_data *gd;
 	int cpu;
 	int rc;

 	for_each_cpu(cpu, policy->cpus)
 		memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
 		       sizeof(struct sched_capacity_reqs));

 	gd = kzalloc(sizeof(*gd), GFP_KERNEL);
 	if (!gd)
 		return -ENOMEM;

 	gd->up_throttle_nsec = policy->cpuinfo.transition_latency ?
 			    policy->cpuinfo.transition_latency :
 			    THROTTLE_UP_NSEC;
 	gd->down_throttle_nsec = THROTTLE_DOWN_NSEC;
 	pr_debug("%s: throttle threshold = %u [ns]\n",
 		  __func__, gd->up_throttle_nsec);

 	policy->governor_data = gd;

 	rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr());
 	if (rc) {
 		pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc);
 		goto err;
 	}

 	if (cpufreq_driver_is_slow()) {
 		cpufreq_driver_slow = true;
 		gd->task = kthread_create(cpufreq_sched_thread, policy,
 					  "kschedfreq:%d",
 					  cpumask_first(policy->related_cpus));
 		if (IS_ERR_OR_NULL(gd->task)) {
 			pr_err("%s: failed to create kschedfreq thread\n",
 			       __func__);
 			goto err;
 		}
 		get_task_struct(gd->task);
 		kthread_bind_mask(gd->task, policy->related_cpus);
 		wake_up_process(gd->task);
 		init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
 	}

 	set_sched_freq();

 	return 0;

 err:
 	policy->governor_data = NULL;
 	kfree(gd);
 	return -ENOMEM;
 }

 static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
 {
 	struct gov_data *gd = policy->governor_data;

 	clear_sched_freq();
 	if (cpufreq_driver_slow) {
 		kthread_stop(gd->task);
 		put_task_struct(gd->task);
 	}

 	sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr());

 	policy->governor_data = NULL;

 	kfree(gd);
 	return 0;
 }

 static int cpufreq_sched_start(struct cpufreq_policy *policy)
 {
 	int cpu;

 	for_each_cpu(cpu, policy->cpus)
 		per_cpu(enabled, cpu) = 1;

 	return 0;
 }

 static void cpufreq_sched_limits(struct cpufreq_policy *policy)
 {
 	unsigned int clamp_freq;
 	struct gov_data *gd = policy->governor_data;;

 	pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
 		policy->cpu, policy->min, policy->max,
 		policy->cur);

 	clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);

 	if (policy->cur != clamp_freq)
 		__cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
 }

 static int cpufreq_sched_stop(struct cpufreq_policy *policy)
 {
 	int cpu;

 	for_each_cpu(cpu, policy->cpus)
 		per_cpu(enabled, cpu) = 0;

 	return 0;
 }

 static int cpufreq_sched_setup(struct cpufreq_policy *policy,
 			       unsigned int event)
 {
 	switch (event) {
 	case CPUFREQ_GOV_POLICY_INIT:
 		return cpufreq_sched_policy_init(policy);
 	case CPUFREQ_GOV_POLICY_EXIT:
 		return cpufreq_sched_policy_exit(policy);
 	case CPUFREQ_GOV_START:
 		return cpufreq_sched_start(policy);
 	case CPUFREQ_GOV_STOP:
 		return cpufreq_sched_stop(policy);
 	case CPUFREQ_GOV_LIMITS:
 		cpufreq_sched_limits(policy);
 		break;
 	}
 	return 0;
 }

 /* Tunables */
 static ssize_t show_up_throttle_nsec(struct gov_data *gd, char *buf)
 {
 	return sprintf(buf, "%u\n", gd->up_throttle_nsec);
 }

 static ssize_t store_up_throttle_nsec(struct gov_data *gd,
 		const char *buf, size_t count)
 {
 	int ret;
 	long unsigned int val;

 	ret = kstrtoul(buf, 0, &val);
 	if (ret < 0)
 		return ret;
 	gd->up_throttle_nsec = val;
 	return count;
 }

 static ssize_t show_down_throttle_nsec(struct gov_data *gd, char *buf)
 {
 	return sprintf(buf, "%u\n", gd->down_throttle_nsec);
 }

 static ssize_t store_down_throttle_nsec(struct gov_data *gd,
 		const char *buf, size_t count)
 {
 	int ret;
 	long unsigned int val;

 	ret = kstrtoul(buf, 0, &val);
 	if (ret < 0)
 		return ret;
 	gd->down_throttle_nsec = val;
 	return count;
 }

 /*
  * Create show/store routines
  * - sys: One governor instance for complete SYSTEM
  * - pol: One governor instance per struct cpufreq_policy
  */
 #define show_gov_pol_sys(file_name)					\
 static ssize_t show_##file_name##_gov_pol				\
 (struct cpufreq_policy *policy, char *buf)				\
 {									\
 	return show_##file_name(policy->governor_data, buf);		\
 }

 #define store_gov_pol_sys(file_name)					\
 static ssize_t store_##file_name##_gov_pol				\
 (struct cpufreq_policy *policy, const char *buf, size_t count)		\
 {									\
 	return store_##file_name(policy->governor_data, buf, count);	\
 }

 #define gov_pol_attr_rw(_name)						\
 	static struct freq_attr _name##_gov_pol =				\
 	__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)

 #define show_store_gov_pol_sys(file_name)				\
 	show_gov_pol_sys(file_name);						\
 	store_gov_pol_sys(file_name)
 #define tunable_handlers(file_name) \
 	show_gov_pol_sys(file_name); \
 	store_gov_pol_sys(file_name); \
 	gov_pol_attr_rw(file_name)

 tunable_handlers(down_throttle_nsec);
 tunable_handlers(up_throttle_nsec);

 /* Per policy governor instance */
 static struct attribute *sched_attributes_gov_pol[] = {
 	&up_throttle_nsec_gov_pol.attr,
 	&down_throttle_nsec_gov_pol.attr,
 	NULL,
 };

 static struct attribute_group sched_attr_group_gov_pol = {
 	.attrs = sched_attributes_gov_pol,
 	.name = "sched",
 };

 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
 static
 #endif
 struct cpufreq_governor cpufreq_gov_sched = {
 	.name			= "sched",
 	.governor		= cpufreq_sched_setup,
 	.owner			= THIS_MODULE,
 };

 static int __init cpufreq_sched_init(void)
 {
 	int cpu;

 	for_each_cpu(cpu, cpu_possible_mask)
 		per_cpu(enabled, cpu) = 0;
 	return cpufreq_register_governor(&cpufreq_gov_sched);
 }

 /* Try to make this the default governor */
 fs_initcall(cpufreq_sched_init);
	/*
	* Copyright (C) 2015 Michael Turquette <mturquette@linaro.org>
	*
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License version 2 as
	* published by the Free Software Foundation.
	*/

	#include <linux/cpufreq.h>
	#include <linux/module.h>
	#include <linux/kthread.h>
	#include <linux/percpu.h>
	#include <linux/irq_work.h>
	#include <linux/delay.h>
	#include <linux/string.h>

	#define CREATE_TRACE_POINTS
	#include <trace/events/cpufreq_sched.h>

	#include "sched.h"

	#define THROTTLE_DOWN_NSEC 50000000 /* 50ms default */
	#define THROTTLE_UP_NSEC 500000 /* 500us default */

	struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
	static bool __read_mostly cpufreq_driver_slow;

	#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
	static struct cpufreq_governor cpufreq_gov_sched;
	#endif

	static DEFINE_PER_CPU(unsigned long, enabled);
	DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);

	/**
	* gov_data - per-policy data internal to the governor
	* @up_throttle: next throttling period expiry if increasing OPP
	* @down_throttle: next throttling period expiry if decreasing OPP
	* @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
	* @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
	* @task: worker thread for dvfs transition that may block/sleep
	* @irq_work: callback used to wake up worker thread
	* @requested_freq: last frequency requested by the sched governor
	*
	* struct gov_data is the per-policy cpufreq_sched-specific data structure. A
	* per-policy instance of it is created when the cpufreq_sched governor receives
	* the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
	* member of struct cpufreq_policy.
	*
	* Readers of this data must call down_read(policy->rwsem). Writers must
	* call down_write(policy->rwsem).
	*/
	struct gov_data {
	ktime_t up_throttle;
	ktime_t down_throttle;
	unsigned int up_throttle_nsec;
	unsigned int down_throttle_nsec;
	struct task_struct *task;
	struct irq_work irq_work;
	unsigned int requested_freq;
	};

	static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
	unsigned int freq)
	{
	struct gov_data *gd = policy->governor_data;

	/* avoid race with cpufreq_sched_stop */
	if (!down_write_trylock(&policy->rwsem))
	return;

	__cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);

	gd->up_throttle = ktime_add_ns(ktime_get(), gd->up_throttle_nsec);
	gd->down_throttle = ktime_add_ns(ktime_get(), gd->down_throttle_nsec);
	up_write(&policy->rwsem);
	}

	static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
	{
	ktime_t now = ktime_get();

	ktime_t throttle = gd->requested_freq < cur_freq ?
	gd->down_throttle : gd->up_throttle;

	if (ktime_after(now, throttle))
	return false;

	while (1) {
	int usec_left = ktime_to_ns(ktime_sub(throttle, now));

	usec_left /= NSEC_PER_USEC;
	trace_cpufreq_sched_throttled(usec_left);
	usleep_range(usec_left, usec_left + 100);
	now = ktime_get();
	if (ktime_after(now, throttle))
	return true;
	}
	}

	/*
	* we pass in struct cpufreq_policy. This is safe because changing out the
	* policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
	* which tears down all of the data structures and __cpufreq_governor(policy,
	* CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
	* new policy pointer
	*/
	static int cpufreq_sched_thread(void *data)
	{
	struct sched_param param;
	struct cpufreq_policy *policy;
	struct gov_data *gd;
	unsigned int new_request = 0;
	unsigned int last_request = 0;
	int ret;

	policy = (struct cpufreq_policy *) data;
	gd = policy->governor_data;

	param.sched_priority = 50;
	ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
	if (ret) {
	pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
	do_exit(-EINVAL);
	} else {
	pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
	__func__, gd->task->pid);
	}

	do {
	new_request = gd->requested_freq;
	if (new_request == last_request) {
	set_current_state(TASK_INTERRUPTIBLE);
	if (kthread_should_stop())
	break;
	schedule();
	} else {
	/*
	* if the frequency thread sleeps while waiting to be
	* unthrottled, start over to check for a newer request
	*/
	if (finish_last_request(gd, policy->cur))
	continue;
	last_request = new_request;
	cpufreq_sched_try_driver_target(policy, new_request);
	}
	} while (!kthread_should_stop());

	return 0;
	}

	static void cpufreq_sched_irq_work(struct irq_work *irq_work)
	{
	struct gov_data *gd;

	gd = container_of(irq_work, struct gov_data, irq_work);
	if (!gd)
	return;

	wake_up_process(gd->task);
	}

	static void update_fdomain_capacity_request(int cpu)
	{
	unsigned int freq_new, index_new, cpu_tmp;
	struct cpufreq_policy *policy;
	struct gov_data *gd;
	unsigned long capacity = 0;

	/*
	* Avoid grabbing the policy if possible. A test is still
	* required after locking the CPU's policy to avoid racing
	* with the governor changing.
	*/
	if (!per_cpu(enabled, cpu))
	return;

	policy = cpufreq_cpu_get(cpu);
	if (IS_ERR_OR_NULL(policy))
	return;

	if (policy->governor != &cpufreq_gov_sched \|\|
	!policy->governor_data)
	goto out;

	gd = policy->governor_data;

	/* find max capacity requested by cpus in this policy */
	for_each_cpu(cpu_tmp, policy->cpus) {
	struct sched_capacity_reqs *scr;

	scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
	capacity = max(capacity, scr->total);
	}

	/* Convert the new maximum capacity request into a cpu frequency */
	freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
	if (cpufreq_frequency_table_target(policy, policy->freq_table,
	freq_new, CPUFREQ_RELATION_L,
	&index_new))
	goto out;
	freq_new = policy->freq_table[index_new].frequency;

	if (freq_new > policy->max)
	freq_new = policy->max;

	if (freq_new < policy->min)
	freq_new = policy->min;

	trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
	gd->requested_freq);
	if (freq_new == gd->requested_freq)
	goto out;

	gd->requested_freq = freq_new;

	/*
	* Throttling is not yet supported on platforms with fast cpufreq
	* drivers.
	*/
	if (cpufreq_driver_slow)
	irq_work_queue_on(&gd->irq_work, cpu);
	else
	cpufreq_sched_try_driver_target(policy, freq_new);

	out:
	cpufreq_cpu_put(policy);
	}

	void update_cpu_capacity_request(int cpu, bool request)
	{
	unsigned long new_capacity;
	struct sched_capacity_reqs *scr;

	/* The rq lock serializes access to the CPU's sched_capacity_reqs. */
	lockdep_assert_held(&cpu_rq(cpu)->lock);

	scr = &per_cpu(cpu_sched_capacity_reqs, cpu);

	new_capacity = scr->cfs + scr->rt;
	new_capacity = new_capacity * capacity_margin
	/ SCHED_CAPACITY_SCALE;
	new_capacity += scr->dl;

	if (new_capacity == scr->total)
	return;

	trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);

	scr->total = new_capacity;
	if (request)
	update_fdomain_capacity_request(cpu);
	}

	static inline void set_sched_freq(void)
	{
	static_key_slow_inc(&__sched_freq);
	}

	static inline void clear_sched_freq(void)
	{
	static_key_slow_dec(&__sched_freq);
	}

	static struct attribute_group sched_attr_group_gov_pol;
	static struct attribute_group *get_sysfs_attr(void)
	{
	return &sched_attr_group_gov_pol;
	}

	static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
	{
	struct gov_data *gd;
	int cpu;
	int rc;

	for_each_cpu(cpu, policy->cpus)
	memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
	sizeof(struct sched_capacity_reqs));

	gd = kzalloc(sizeof(*gd), GFP_KERNEL);
	if (!gd)
	return -ENOMEM;

	gd->up_throttle_nsec = policy->cpuinfo.transition_latency ?
	policy->cpuinfo.transition_latency :
	THROTTLE_UP_NSEC;
	gd->down_throttle_nsec = THROTTLE_DOWN_NSEC;
	pr_debug("%s: throttle threshold = %u [ns]\n",
	__func__, gd->up_throttle_nsec);

	policy->governor_data = gd;

	rc = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr());
	if (rc) {
	pr_err("%s: couldn't create sysfs attributes: %d\n", __func__, rc);
	goto err;
	}

	if (cpufreq_driver_is_slow()) {
	cpufreq_driver_slow = true;
	gd->task = kthread_create(cpufreq_sched_thread, policy,
	"kschedfreq:%d",
	cpumask_first(policy->related_cpus));
	if (IS_ERR_OR_NULL(gd->task)) {
	pr_err("%s: failed to create kschedfreq thread\n",
	__func__);
	goto err;
	}
	get_task_struct(gd->task);
	kthread_bind_mask(gd->task, policy->related_cpus);
	wake_up_process(gd->task);
	init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
	}

	set_sched_freq();

	return 0;

	err:
	policy->governor_data = NULL;
	kfree(gd);
	return -ENOMEM;
	}

	static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
	{
	struct gov_data *gd = policy->governor_data;

	clear_sched_freq();
	if (cpufreq_driver_slow) {
	kthread_stop(gd->task);
	put_task_struct(gd->task);
	}

	sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr());

	policy->governor_data = NULL;

	kfree(gd);
	return 0;
	}

	static int cpufreq_sched_start(struct cpufreq_policy *policy)
	{
	int cpu;

	for_each_cpu(cpu, policy->cpus)
	per_cpu(enabled, cpu) = 1;

	return 0;
	}

	static void cpufreq_sched_limits(struct cpufreq_policy *policy)
	{
	unsigned int clamp_freq;
	struct gov_data *gd = policy->governor_data;;

	pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
	policy->cpu, policy->min, policy->max,
	policy->cur);

	clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);

	if (policy->cur != clamp_freq)
	__cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
	}

	static int cpufreq_sched_stop(struct cpufreq_policy *policy)
	{
	int cpu;

	for_each_cpu(cpu, policy->cpus)
	per_cpu(enabled, cpu) = 0;

	return 0;
	}

	static int cpufreq_sched_setup(struct cpufreq_policy *policy,
	unsigned int event)
	{
	switch (event) {
	case CPUFREQ_GOV_POLICY_INIT:
	return cpufreq_sched_policy_init(policy);
	case CPUFREQ_GOV_POLICY_EXIT:
	return cpufreq_sched_policy_exit(policy);
	case CPUFREQ_GOV_START:
	return cpufreq_sched_start(policy);
	case CPUFREQ_GOV_STOP:
	return cpufreq_sched_stop(policy);
	case CPUFREQ_GOV_LIMITS:
	cpufreq_sched_limits(policy);
	break;
	}
	return 0;
	}

	/* Tunables */
	static ssize_t show_up_throttle_nsec(struct gov_data gd, char buf)
	{
	return sprintf(buf, "%u\n", gd->up_throttle_nsec);
	}

	static ssize_t store_up_throttle_nsec(struct gov_data *gd,
	const char *buf, size_t count)
	{
	int ret;
	long unsigned int val;

	ret = kstrtoul(buf, 0, &val);
	if (ret < 0)
	return ret;
	gd->up_throttle_nsec = val;
	return count;
	}

	static ssize_t show_down_throttle_nsec(struct gov_data gd, char buf)
	{
	return sprintf(buf, "%u\n", gd->down_throttle_nsec);
	}

	static ssize_t store_down_throttle_nsec(struct gov_data *gd,
	const char *buf, size_t count)
	{
	int ret;
	long unsigned int val;

	ret = kstrtoul(buf, 0, &val);
	if (ret < 0)
	return ret;
	gd->down_throttle_nsec = val;
	return count;
	}

	/*
	* Create show/store routines
	* - sys: One governor instance for complete SYSTEM
	* - pol: One governor instance per struct cpufreq_policy
	*/
	#define show_gov_pol_sys(file_name) \
	static ssize_t show_##file_name##_gov_pol \
	(struct cpufreq_policy policy, char buf) \
	{ \
	return show_##file_name(policy->governor_data, buf); \
	}

	#define store_gov_pol_sys(file_name) \
	static ssize_t store_##file_name##_gov_pol \
	(struct cpufreq_policy policy, const char buf, size_t count) \
	{ \
	return store_##file_name(policy->governor_data, buf, count); \
	}

	#define gov_pol_attr_rw(_name) \
	static struct freq_attr _name##_gov_pol = \
	__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)

	#define show_store_gov_pol_sys(file_name) \
	show_gov_pol_sys(file_name); \
	store_gov_pol_sys(file_name)
	#define tunable_handlers(file_name) \
	show_gov_pol_sys(file_name); \
	store_gov_pol_sys(file_name); \
	gov_pol_attr_rw(file_name)

	tunable_handlers(down_throttle_nsec);
	tunable_handlers(up_throttle_nsec);

	/* Per policy governor instance */
	static struct attribute *sched_attributes_gov_pol[] = {
	&up_throttle_nsec_gov_pol.attr,
	&down_throttle_nsec_gov_pol.attr,
	NULL,
	};

	static struct attribute_group sched_attr_group_gov_pol = {
	.attrs = sched_attributes_gov_pol,
	.name = "sched",
	};

	#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
	static
	#endif
	struct cpufreq_governor cpufreq_gov_sched = {
	.name = "sched",
	.governor = cpufreq_sched_setup,
	.owner = THIS_MODULE,
	};

	static int __init cpufreq_sched_init(void)
	{
	int cpu;

	for_each_cpu(cpu, cpu_possible_mask)
	per_cpu(enabled, cpu) = 0;
	return cpufreq_register_governor(&cpufreq_gov_sched);
	}

	/* Try to make this the default governor */
	fs_initcall(cpufreq_sched_init);