| /* |
| * drivers/misc/cpuload.c |
| * |
| * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. |
| * |
| * This software is licensed under the terms of the GNU General Public |
| * License version 2, as published by the Free Software Foundation, and |
| * may be copied, distributed, and modified under those terms. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| */ |
| |
| #include <linux/cpu.h> |
| #include <linux/cpumask.h> |
| #include <linux/cpufreq.h> |
| #include <linux/module.h> |
| #include <linux/mutex.h> |
| #include <linux/sched.h> |
| #include <linux/tick.h> |
| #include <linux/timer.h> |
| #include <linux/workqueue.h> |
| #include <linux/kthread.h> |
| #include <linux/mutex.h> |
| |
| #include <asm/cputime.h> |
| |
| static atomic_t active_count = ATOMIC_INIT(0); |
| static unsigned int enabled; |
| |
| static void cpuloadmon_enable(unsigned int state); |
| |
| struct cpuloadmon_cpuinfo { |
| /* cpu load */ |
| struct timer_list cpu_timer; |
| int timer_idlecancel; |
| u64 time_in_idle; |
| u64 time_in_iowait; |
| u64 idle_exit_time; |
| u64 timer_run_time; |
| int idling; |
| int monitor_enabled; |
| int cpu_load; |
| |
| /* runnable threads */ |
| u64 previous_integral; |
| unsigned int avg; |
| bool integral_sampled; |
| u64 prev_timestamp; |
| }; |
| |
| static DEFINE_PER_CPU(struct cpuloadmon_cpuinfo, cpuinfo); |
| |
| /* Consider IO as busy */ |
| static unsigned long io_is_busy; |
| |
| /* |
| * The sample rate of the timer used to increase frequency |
| */ |
| #define DEFAULT_TIMER_RATE 20000; |
| static unsigned long timer_rate; |
| |
| /* nr runnable threads */ |
| #define NR_FSHIFT_EXP 3 |
| #define NR_FSHIFT (1 << NR_FSHIFT_EXP) |
| #define EXP 1497 /* 20 msec window */ |
| |
| static inline cputime64_t get_cpu_iowait_time( |
| unsigned int cpu, cputime64_t *wall) |
| { |
| u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); |
| |
| if (iowait_time == -1ULL) |
| return 0; |
| |
| return iowait_time; |
| } |
| |
| static void cpuloadmon_timer(unsigned long data) |
| { |
| unsigned int delta_idle; |
| unsigned int delta_iowait; |
| unsigned int delta_time; |
| u64 time_in_idle; |
| u64 time_in_iowait; |
| u64 idle_exit_time; |
| struct cpuloadmon_cpuinfo *pcpu = |
| &per_cpu(cpuinfo, data); |
| u64 now_idle; |
| u64 now_iowait; |
| u64 integral, old_integral, delta_integral, delta_time_nr, cur_time; |
| |
| smp_rmb(); |
| |
| if (!pcpu->monitor_enabled) |
| goto exit; |
| |
| /* |
| * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time, |
| * this lets idle exit know the current idle time sample has |
| * been processed, and idle exit can generate a new sample and |
| * re-arm the timer. This prevents a concurrent idle |
| * exit on that CPU from writing a new set of info at the same time |
| * the timer function runs (the timer function can't use that info |
| * until more time passes). |
| */ |
| time_in_idle = pcpu->time_in_idle; |
| time_in_iowait = pcpu->time_in_iowait; |
| idle_exit_time = pcpu->idle_exit_time; |
| now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time); |
| now_iowait = get_cpu_iowait_time(data, NULL); |
| smp_wmb(); |
| |
| /* If we raced with cancelling a timer, skip. */ |
| if (!idle_exit_time) |
| goto exit; |
| |
| delta_idle = (unsigned int)(now_idle - time_in_idle); |
| delta_iowait = (unsigned int)(now_iowait - time_in_iowait); |
| delta_time = (unsigned int)(pcpu->timer_run_time - idle_exit_time); |
| |
| /* |
| * If timer ran less than 1ms after short-term sample started, retry. |
| */ |
| if (delta_time < 1000) |
| goto rearm; |
| |
| if (!io_is_busy) |
| delta_idle += delta_iowait; |
| |
| if (delta_idle > delta_time) |
| pcpu->cpu_load = 0; |
| else |
| pcpu->cpu_load = 100 * (delta_time - delta_idle) / delta_time; |
| |
| /* get avg nr runnables */ |
| integral = nr_running_integral(data); |
| old_integral = pcpu->previous_integral; |
| pcpu->previous_integral = integral; |
| cur_time = ktime_to_ns(ktime_get()); |
| delta_time_nr = cur_time - pcpu->prev_timestamp; |
| pcpu->prev_timestamp = cur_time; |
| |
| if (!pcpu->integral_sampled) { |
| pcpu->integral_sampled = true; |
| /* First sample to initialize prev_integral, skip |
| * avg calculation |
| */ |
| } else { |
| if (integral < old_integral) { |
| /* Overflow */ |
| delta_integral = (ULLONG_MAX - old_integral) + integral; |
| } else { |
| delta_integral = integral - old_integral; |
| } |
| |
| /* Calculate average for the previous sample window */ |
| do_div(delta_integral, delta_time_nr); |
| pcpu->avg = delta_integral; |
| } |
| |
| rearm: |
| if (!timer_pending(&pcpu->cpu_timer)) { |
| if (pcpu->idling) |
| goto exit; |
| |
| pcpu->time_in_idle = get_cpu_idle_time_us( |
| data, &pcpu->idle_exit_time); |
| pcpu->time_in_iowait = get_cpu_iowait_time( |
| data, NULL); |
| |
| mod_timer(&pcpu->cpu_timer, |
| jiffies + usecs_to_jiffies(timer_rate)); |
| } |
| |
| exit: |
| return; |
| } |
| |
| static void cpuloadmon_idle_start(void) |
| { |
| struct cpuloadmon_cpuinfo *pcpu = |
| &per_cpu(cpuinfo, smp_processor_id()); |
| int pending; |
| |
| if (!pcpu->monitor_enabled) |
| return; |
| |
| pcpu->idling = 1; |
| smp_wmb(); |
| pending = timer_pending(&pcpu->cpu_timer); |
| |
| if (pending && pcpu->timer_idlecancel) { |
| del_timer(&pcpu->cpu_timer); |
| /* |
| * Ensure last timer run time is after current idle |
| * sample start time, so next idle exit will always |
| * start a new idle sampling period. |
| */ |
| pcpu->idle_exit_time = 0; |
| pcpu->timer_idlecancel = 0; |
| } |
| } |
| |
| static void cpuloadmon_idle_end(void) |
| { |
| struct cpuloadmon_cpuinfo *pcpu = |
| &per_cpu(cpuinfo, smp_processor_id()); |
| |
| if (!pcpu->monitor_enabled) |
| return; |
| |
| pcpu->idling = 0; |
| smp_wmb(); |
| |
| /* |
| * Arm the timer for 1-2 ticks later if not already, and if the timer |
| * function has already processed the previous load sampling |
| * interval. (If the timer is not pending but has not processed |
| * the previous interval, it is probably racing with us on another |
| * CPU. Let it compute load based on the previous sample and then |
| * re-arm the timer for another interval when it's done, rather |
| * than updating the interval start time to be "now", which doesn't |
| * give the timer function enough time to make a decision on this |
| * run.) |
| */ |
| if (timer_pending(&pcpu->cpu_timer) == 0 && |
| pcpu->timer_run_time >= pcpu->idle_exit_time && |
| pcpu->monitor_enabled) { |
| pcpu->time_in_idle = |
| get_cpu_idle_time_us(smp_processor_id(), |
| &pcpu->idle_exit_time); |
| pcpu->time_in_iowait = |
| get_cpu_iowait_time(smp_processor_id(), |
| NULL); |
| pcpu->timer_idlecancel = 0; |
| mod_timer(&pcpu->cpu_timer, |
| jiffies + usecs_to_jiffies(timer_rate)); |
| } |
| } |
| |
| #define DECL_CPULOAD_ATTR(name) \ |
| static ssize_t show_##name(struct kobject *kobj, \ |
| struct attribute *attr, char *buf) \ |
| { \ |
| return sprintf(buf, "%lu\n", name); \ |
| } \ |
| \ |
| static ssize_t store_##name(struct kobject *kobj,\ |
| struct attribute *attr, const char *buf, size_t count) \ |
| { \ |
| int ret; \ |
| unsigned long val; \ |
| \ |
| ret = kstrtoul(buf, 0, &val); \ |
| if (ret < 0) \ |
| return ret; \ |
| name = val; \ |
| return count; \ |
| } \ |
| \ |
| static struct global_attr name##_attr = __ATTR(name, 0644, \ |
| show_##name, store_##name); |
| |
| static ssize_t show_cpus_online(struct kobject *kobj, |
| struct attribute *attr, char *buf) |
| { |
| unsigned int i, t; |
| const cpumask_t *cpus = cpu_online_mask; |
| |
| i = 0; |
| for_each_cpu_mask(t, *cpus) |
| i++; |
| |
| return sprintf(buf, "%u\n", i); |
| } |
| |
| static struct global_attr cpus_online_attr = __ATTR(cpus_online, 0444, |
| show_cpus_online, NULL); |
| |
| static ssize_t show_cpu_load(struct kobject *kobj, |
| struct attribute *attr, char *buf) |
| { |
| unsigned int t, len, total; |
| const cpumask_t *cpus = cpu_online_mask; |
| struct cpuloadmon_cpuinfo *pcpu; |
| |
| total = 0; |
| |
| for_each_cpu_mask(t, *cpus) { |
| pcpu = &per_cpu(cpuinfo, t); |
| len = sprintf(buf, "%u %u %u\n", |
| t, pcpu->cpu_load, pcpu->avg); |
| total += len; |
| buf = &buf[len]; |
| } |
| |
| return total; |
| } |
| |
| static struct global_attr cpu_load_attr = __ATTR(cpu_load, 0444, |
| show_cpu_load, NULL); |
| |
| static ssize_t show_cpu_usage(struct kobject *kobj, |
| struct attribute *attr, char *buf) |
| { |
| unsigned int t, len, total; |
| const cpumask_t *cpus = cpu_online_mask; |
| struct cpuloadmon_cpuinfo *pcpu; |
| |
| total = 0; |
| |
| for_each_cpu_mask(t, *cpus) { |
| pcpu = &per_cpu(cpuinfo, t); |
| len = sprintf(buf, "%u %u %llu %llu %llu\n", |
| t, pcpu->avg, |
| ktime_to_us(ktime_get()), |
| get_cpu_idle_time_us(t, NULL), |
| get_cpu_iowait_time_us(t, NULL)); |
| total += len; |
| buf = &buf[len]; |
| } |
| |
| return total; |
| } |
| |
| static struct global_attr cpu_usage_attr = __ATTR(cpu_usage, 0444, |
| show_cpu_usage, NULL); |
| |
| static ssize_t show_enable(struct kobject *kobj, |
| struct attribute *attr, char *buf) |
| { |
| return sprintf(buf, "%u\n", enabled); |
| } |
| |
| static ssize_t store_enable(struct kobject *kobj, |
| struct attribute *attr, const char *buf, size_t count) |
| { |
| int ret; |
| unsigned long val; |
| unsigned int before = enabled; |
| |
| ret = kstrtoul(buf, 0, &val); |
| if (ret < 0) |
| return ret; |
| enabled = !!val; /* normalize user input */ |
| if (before != enabled) |
| cpuloadmon_enable(enabled); |
| |
| return count; |
| } |
| static struct global_attr enable_attr = __ATTR(enable, 0644, |
| show_enable, store_enable); |
| |
| DECL_CPULOAD_ATTR(io_is_busy) |
| DECL_CPULOAD_ATTR(timer_rate) |
| #undef DECL_CPULOAD_ATTR |
| |
| static struct attribute *cpuload_attributes[] = { |
| &io_is_busy_attr.attr, |
| &timer_rate_attr.attr, |
| &cpus_online_attr.attr, |
| &cpu_load_attr.attr, |
| &cpu_usage_attr.attr, |
| &enable_attr.attr, |
| NULL, |
| }; |
| |
| static struct attribute_group cpuload_attr_group = { |
| .attrs = cpuload_attributes, |
| .name = "cpuload", |
| }; |
| |
| static int cpuloadmon_idle_notifier(struct notifier_block *nb, |
| unsigned long val, |
| void *data) |
| { |
| switch (val) { |
| case IDLE_START: |
| cpuloadmon_idle_start(); |
| break; |
| case IDLE_END: |
| cpuloadmon_idle_end(); |
| break; |
| } |
| |
| return 0; |
| } |
| |
| static struct notifier_block cpuloadmon_idle_nb = { |
| .notifier_call = cpuloadmon_idle_notifier, |
| }; |
| |
| static void cpuloadmon_enable(unsigned int state) |
| { |
| unsigned int j; |
| struct cpuloadmon_cpuinfo *pcpu; |
| const cpumask_t *cpus = cpu_possible_mask; |
| |
| if (state) { |
| u64 last_update; |
| |
| for_each_cpu(j, cpus) { |
| pcpu = &per_cpu(cpuinfo, j); |
| pcpu->time_in_idle = |
| get_cpu_idle_time_us(j, &last_update); |
| pcpu->idle_exit_time = last_update; |
| pcpu->time_in_iowait = |
| get_cpu_iowait_time(j, NULL); |
| pcpu->timer_idlecancel = 1; |
| pcpu->monitor_enabled = 1; |
| smp_wmb(); |
| |
| if (!timer_pending(&pcpu->cpu_timer)) |
| mod_timer(&pcpu->cpu_timer, jiffies + 2); |
| } |
| } else { |
| for_each_cpu(j, cpus) { |
| pcpu = &per_cpu(cpuinfo, j); |
| pcpu->monitor_enabled = 0; |
| smp_wmb(); |
| del_timer_sync(&pcpu->cpu_timer); |
| |
| /* |
| * Reset idle exit time since we may cancel the timer |
| * before it can run after the last idle exit time, |
| * to avoid tripping the check in idle exit for a timer |
| * that is trying to run. |
| */ |
| pcpu->idle_exit_time = 0; |
| } |
| } |
| |
| enabled = state; |
| } |
| |
| static int cpuloadmon_start(void) |
| { |
| int rc; |
| |
| cpuloadmon_enable(1); |
| |
| /* |
| * Do not register the idle hook and create sysfs |
| * entries if we have already done so. |
| */ |
| if (atomic_inc_return(&active_count) > 1) |
| return 0; |
| |
| rc = sysfs_create_group(cpufreq_global_kobject, |
| &cpuload_attr_group); |
| if (rc) |
| return rc; |
| |
| idle_notifier_register(&cpuloadmon_idle_nb); |
| |
| return 0; |
| } |
| |
| static int cpuloadmon_stop(void) |
| { |
| cpuloadmon_enable(0); |
| |
| if (atomic_dec_return(&active_count) > 0) |
| return 0; |
| |
| idle_notifier_unregister(&cpuloadmon_idle_nb); |
| sysfs_remove_group(cpufreq_global_kobject, |
| &cpuload_attr_group); |
| |
| return 0; |
| } |
| |
| static int __init cpuload_monitor_init(void) |
| { |
| unsigned int i; |
| struct cpuloadmon_cpuinfo *pcpu; |
| |
| timer_rate = DEFAULT_TIMER_RATE; |
| |
| /* Initalize per-cpu timers */ |
| for_each_possible_cpu(i) { |
| pcpu = &per_cpu(cpuinfo, i); |
| init_timer(&pcpu->cpu_timer); |
| pcpu->cpu_timer.function = cpuloadmon_timer; |
| pcpu->cpu_timer.data = i; |
| } |
| |
| cpuloadmon_start(); |
| |
| /* disable by default */ |
| cpuloadmon_enable(0); |
| |
| return 0; |
| } |
| |
| module_init(cpuload_monitor_init); |
| |
| static void __exit cpuload_monitor_exit(void) |
| { |
| cpuloadmon_stop(); |
| } |
| |
| module_exit(cpuload_monitor_exit); |
| |
| MODULE_AUTHOR("Ilan Aelion <iaelion@nvidia.com>"); |
| MODULE_DESCRIPTION("'cpuload_monitor' - A cpu load monitor"); |
| MODULE_LICENSE("GPL"); |