| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| From: Connor O'Brien <connoro@google.com> |
| Date: Wed, 31 Jan 2018 18:11:57 -0800 |
| Subject: ANDROID: cpufreq: track per-task time in state |
| |
| Add time in state data to task structs, and create |
| /proc/<pid>/time_in_state files to show how long each individual task |
| has run at each frequency. |
| Create a CONFIG_CPU_FREQ_TIMES option to enable/disable this tracking. |
| |
| Bug: 72339335 |
| Bug: 127641090 |
| Test: Read /proc/<pid>/time_in_state |
| Change-Id: Ia6456754f4cb1e83b2bc35efa8fbe9f8696febc8 |
| Signed-off-by: Connor O'Brien <connoro@google.com> |
| [adelva: Folded the following changes into this patch: |
| a6d3de6a7fba ("ANDROID: Reduce use of #ifdef CONFIG_CPU_FREQ_TIMES") |
| b89ada5d9c09 ("ANDROID: Fix massive cpufreq_times memory leaks")] |
| Signed-off-by: Alistair Delva <adelva@google.com> |
| --- |
| drivers/cpufreq/Kconfig | 7 ++ |
| drivers/cpufreq/Makefile | 5 +- |
| drivers/cpufreq/cpufreq.c | 3 + |
| drivers/cpufreq/cpufreq_times.c | 207 ++++++++++++++++++++++++++++++++ |
| fs/proc/base.c | 7 ++ |
| include/linux/cpufreq_times.h | 41 +++++++ |
| include/linux/sched.h | 4 + |
| kernel/fork.c | 6 + |
| kernel/sched/cputime.c | 7 ++ |
| 9 files changed, 286 insertions(+), 1 deletion(-) |
| create mode 100644 drivers/cpufreq/cpufreq_times.c |
| create mode 100644 include/linux/cpufreq_times.h |
| |
| diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig |
| --- a/drivers/cpufreq/Kconfig |
| +++ b/drivers/cpufreq/Kconfig |
| @@ -34,6 +34,13 @@ config CPU_FREQ_STAT |
| |
| If in doubt, say N. |
| |
| +config CPU_FREQ_TIMES |
| + bool "CPU frequency time-in-state statistics" |
| + help |
| + Export CPU time-in-state information through procfs. |
| + |
| + If in doubt, say N. |
| + |
| choice |
| prompt "Default CPUFreq governor" |
| default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ |
| diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile |
| --- a/drivers/cpufreq/Makefile |
| +++ b/drivers/cpufreq/Makefile |
| @@ -5,7 +5,10 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o |
| # CPUfreq stats |
| obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o |
| |
| -# CPUfreq governors |
| +# CPUfreq times |
| +obj-$(CONFIG_CPU_FREQ_TIMES) += cpufreq_times.o |
| + |
| +# CPUfreq governors |
| obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o |
| obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o |
| obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o |
| diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c |
| --- a/drivers/cpufreq/cpufreq.c |
| +++ b/drivers/cpufreq/cpufreq.c |
| @@ -16,6 +16,7 @@ |
| |
| #include <linux/cpu.h> |
| #include <linux/cpufreq.h> |
| +#include <linux/cpufreq_times.h> |
| #include <linux/cpu_cooling.h> |
| #include <linux/delay.h> |
| #include <linux/device.h> |
| @@ -387,6 +388,7 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, |
| CPUFREQ_POSTCHANGE, freqs); |
| |
| cpufreq_stats_record_transition(policy, freqs->new); |
| + cpufreq_times_record_transition(freqs); |
| policy->cur = freqs->new; |
| } |
| } |
| @@ -1466,6 +1468,7 @@ static int cpufreq_online(unsigned int cpu) |
| goto out_destroy_policy; |
| |
| cpufreq_stats_create_table(policy); |
| + cpufreq_times_create_policy(policy); |
| |
| write_lock_irqsave(&cpufreq_driver_lock, flags); |
| list_add(&policy->policy_list, &cpufreq_policy_list); |
| diff --git a/drivers/cpufreq/cpufreq_times.c b/drivers/cpufreq/cpufreq_times.c |
| new file mode 100644 |
| index 000000000000..339a3e9cf082 |
| --- /dev/null |
| +++ b/drivers/cpufreq/cpufreq_times.c |
| @@ -0,0 +1,207 @@ |
| +/* drivers/cpufreq/cpufreq_times.c |
| + * |
| + * Copyright (C) 2018 Google, Inc. |
| + * |
| + * This software is licensed under the terms of the GNU General Public |
| + * License version 2, as published by the Free Software Foundation, and |
| + * may be copied, distributed, and modified under those terms. |
| + * |
| + * This program is distributed in the hope that it will be useful, |
| + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| + * GNU General Public License for more details. |
| + * |
| + */ |
| + |
| +#include <linux/cpufreq.h> |
| +#include <linux/cpufreq_times.h> |
| +#include <linux/jiffies.h> |
| +#include <linux/sched.h> |
| +#include <linux/seq_file.h> |
| +#include <linux/slab.h> |
| +#include <linux/spinlock.h> |
| +#include <linux/threads.h> |
| + |
| +static DEFINE_SPINLOCK(task_time_in_state_lock); /* task->time_in_state */ |
| + |
| +/** |
| + * struct cpu_freqs - per-cpu frequency information |
| + * @offset: start of these freqs' stats in task time_in_state array |
| + * @max_state: number of entries in freq_table |
| + * @last_index: index in freq_table of last frequency switched to |
| + * @freq_table: list of available frequencies |
| + */ |
| +struct cpu_freqs { |
| + unsigned int offset; |
| + unsigned int max_state; |
| + unsigned int last_index; |
| + unsigned int freq_table[0]; |
| +}; |
| + |
| +static struct cpu_freqs *all_freqs[NR_CPUS]; |
| + |
| +static unsigned int next_offset; |
| + |
| +void cpufreq_task_times_init(struct task_struct *p) |
| +{ |
| + unsigned long flags; |
| + |
| + spin_lock_irqsave(&task_time_in_state_lock, flags); |
| + p->time_in_state = NULL; |
| + spin_unlock_irqrestore(&task_time_in_state_lock, flags); |
| + p->max_state = 0; |
| +} |
| + |
| +void cpufreq_task_times_alloc(struct task_struct *p) |
| +{ |
| + void *temp; |
| + unsigned long flags; |
| + unsigned int max_state = READ_ONCE(next_offset); |
| + |
| + /* We use one array to avoid multiple allocs per task */ |
| + temp = kcalloc(max_state, sizeof(p->time_in_state[0]), GFP_ATOMIC); |
| + if (!temp) |
| + return; |
| + |
| + spin_lock_irqsave(&task_time_in_state_lock, flags); |
| + p->time_in_state = temp; |
| + spin_unlock_irqrestore(&task_time_in_state_lock, flags); |
| + p->max_state = max_state; |
| +} |
| + |
| +/* Caller must hold task_time_in_state_lock */ |
| +static int cpufreq_task_times_realloc_locked(struct task_struct *p) |
| +{ |
| + void *temp; |
| + unsigned int max_state = READ_ONCE(next_offset); |
| + |
| + temp = krealloc(p->time_in_state, max_state * sizeof(u64), GFP_ATOMIC); |
| + if (!temp) |
| + return -ENOMEM; |
| + p->time_in_state = temp; |
| + memset(p->time_in_state + p->max_state, 0, |
| + (max_state - p->max_state) * sizeof(u64)); |
| + p->max_state = max_state; |
| + return 0; |
| +} |
| + |
| +void cpufreq_task_times_exit(struct task_struct *p) |
| +{ |
| + unsigned long flags; |
| + void *temp; |
| + |
| + spin_lock_irqsave(&task_time_in_state_lock, flags); |
| + temp = p->time_in_state; |
| + p->time_in_state = NULL; |
| + spin_unlock_irqrestore(&task_time_in_state_lock, flags); |
| + kfree(temp); |
| +} |
| + |
| +int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns, |
| + struct pid *pid, struct task_struct *p) |
| +{ |
| + unsigned int cpu, i; |
| + u64 cputime; |
| + unsigned long flags; |
| + struct cpu_freqs *freqs; |
| + struct cpu_freqs *last_freqs = NULL; |
| + |
| + spin_lock_irqsave(&task_time_in_state_lock, flags); |
| + for_each_possible_cpu(cpu) { |
| + freqs = all_freqs[cpu]; |
| + if (!freqs || freqs == last_freqs) |
| + continue; |
| + last_freqs = freqs; |
| + |
| + seq_printf(m, "cpu%u\n", cpu); |
| + for (i = 0; i < freqs->max_state; i++) { |
| + if (freqs->freq_table[i] == CPUFREQ_ENTRY_INVALID) |
| + continue; |
| + cputime = 0; |
| + if (freqs->offset + i < p->max_state && |
| + p->time_in_state) |
| + cputime = p->time_in_state[freqs->offset + i]; |
| + seq_printf(m, "%u %lu\n", freqs->freq_table[i], |
| + (unsigned long)nsec_to_clock_t(cputime)); |
| + } |
| + } |
| + spin_unlock_irqrestore(&task_time_in_state_lock, flags); |
| + return 0; |
| +} |
| + |
| +void cpufreq_acct_update_power(struct task_struct *p, u64 cputime) |
| +{ |
| + unsigned long flags; |
| + unsigned int state; |
| + struct cpu_freqs *freqs = all_freqs[task_cpu(p)]; |
| + |
| + if (!freqs || p->flags & PF_EXITING) |
| + return; |
| + |
| + state = freqs->offset + READ_ONCE(freqs->last_index); |
| + |
| + spin_lock_irqsave(&task_time_in_state_lock, flags); |
| + if ((state < p->max_state || !cpufreq_task_times_realloc_locked(p)) && |
| + p->time_in_state) |
| + p->time_in_state[state] += cputime; |
| + spin_unlock_irqrestore(&task_time_in_state_lock, flags); |
| +} |
| + |
| +void cpufreq_times_create_policy(struct cpufreq_policy *policy) |
| +{ |
| + int cpu, index; |
| + unsigned int count = 0; |
| + struct cpufreq_frequency_table *pos, *table; |
| + struct cpu_freqs *freqs; |
| + void *tmp; |
| + |
| + if (all_freqs[policy->cpu]) |
| + return; |
| + |
| + table = policy->freq_table; |
| + if (!table) |
| + return; |
| + |
| + cpufreq_for_each_entry(pos, table) |
| + count++; |
| + |
| + tmp = kzalloc(sizeof(*freqs) + sizeof(freqs->freq_table[0]) * count, |
| + GFP_KERNEL); |
| + if (!tmp) |
| + return; |
| + |
| + freqs = tmp; |
| + freqs->max_state = count; |
| + |
| + index = cpufreq_frequency_table_get_index(policy, policy->cur); |
| + if (index >= 0) |
| + WRITE_ONCE(freqs->last_index, index); |
| + |
| + cpufreq_for_each_entry(pos, table) |
| + freqs->freq_table[pos - table] = pos->frequency; |
| + |
| + freqs->offset = next_offset; |
| + WRITE_ONCE(next_offset, freqs->offset + count); |
| + for_each_cpu(cpu, policy->related_cpus) |
| + all_freqs[cpu] = freqs; |
| +} |
| + |
| +void cpufreq_times_record_transition(struct cpufreq_freqs *freq) |
| +{ |
| + int index; |
| + struct cpu_freqs *freqs = all_freqs[freq->cpu]; |
| + struct cpufreq_policy *policy; |
| + |
| + if (!freqs) |
| + return; |
| + |
| + policy = cpufreq_cpu_get(freq->cpu); |
| + if (!policy) |
| + return; |
| + |
| + index = cpufreq_frequency_table_get_index(policy, freq->new); |
| + if (index >= 0) |
| + WRITE_ONCE(freqs->last_index, index); |
| + |
| + cpufreq_cpu_put(policy); |
| +} |
| diff --git a/fs/proc/base.c b/fs/proc/base.c |
| --- a/fs/proc/base.c |
| +++ b/fs/proc/base.c |
| @@ -96,6 +96,7 @@ |
| #include <linux/posix-timers.h> |
| #include <linux/time_namespace.h> |
| #include <linux/resctrl.h> |
| +#include <linux/cpufreq_times.h> |
| #include <trace/events/oom.h> |
| #include "internal.h" |
| #include "fd.h" |
| @@ -3222,6 +3223,9 @@ static const struct pid_entry tgid_base_stuff[] = { |
| #ifdef CONFIG_LIVEPATCH |
| ONE("patch_state", S_IRUSR, proc_pid_patch_state), |
| #endif |
| +#ifdef CONFIG_CPU_FREQ_TIMES |
| + ONE("time_in_state", 0444, proc_time_in_state_show), |
| +#endif |
| #ifdef CONFIG_STACKLEAK_METRICS |
| ONE("stack_depth", S_IRUGO, proc_stack_depth), |
| #endif |
| @@ -3559,6 +3563,9 @@ static const struct pid_entry tid_base_stuff[] = { |
| #ifdef CONFIG_PROC_PID_ARCH_STATUS |
| ONE("arch_status", S_IRUGO, proc_pid_arch_status), |
| #endif |
| +#ifdef CONFIG_CPU_FREQ_TIMES |
| + ONE("time_in_state", 0444, proc_time_in_state_show), |
| +#endif |
| }; |
| |
| static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) |
| diff --git a/include/linux/cpufreq_times.h b/include/linux/cpufreq_times.h |
| new file mode 100644 |
| index 000000000000..bfef6e62c68a |
| --- /dev/null |
| +++ b/include/linux/cpufreq_times.h |
| @@ -0,0 +1,41 @@ |
| +/* drivers/cpufreq/cpufreq_times.c |
| + * |
| + * Copyright (C) 2018 Google, Inc. |
| + * |
| + * This software is licensed under the terms of the GNU General Public |
| + * License version 2, as published by the Free Software Foundation, and |
| + * may be copied, distributed, and modified under those terms. |
| + * |
| + * This program is distributed in the hope that it will be useful, |
| + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| + * GNU General Public License for more details. |
| + * |
| + */ |
| + |
| +#ifndef _LINUX_CPUFREQ_TIMES_H |
| +#define _LINUX_CPUFREQ_TIMES_H |
| + |
| +#include <linux/cpufreq.h> |
| +#include <linux/pid.h> |
| + |
| +#ifdef CONFIG_CPU_FREQ_TIMES |
| +void cpufreq_task_times_init(struct task_struct *p); |
| +void cpufreq_task_times_alloc(struct task_struct *p); |
| +void cpufreq_task_times_exit(struct task_struct *p); |
| +int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns, |
| + struct pid *pid, struct task_struct *p); |
| +void cpufreq_acct_update_power(struct task_struct *p, u64 cputime); |
| +void cpufreq_times_create_policy(struct cpufreq_policy *policy); |
| +void cpufreq_times_record_transition(struct cpufreq_freqs *freq); |
| +#else |
| +static inline void cpufreq_task_times_init(struct task_struct *p) {} |
| +static inline void cpufreq_task_times_alloc(struct task_struct *p) {} |
| +static inline void cpufreq_task_times_exit(struct task_struct *p) {} |
| +static inline void cpufreq_acct_update_power(struct task_struct *p, |
| + u64 cputime) {} |
| +static inline void cpufreq_times_create_policy(struct cpufreq_policy *policy) {} |
| +static inline void cpufreq_times_record_transition( |
| + struct cpufreq_freqs *freq) {} |
| +#endif /* CONFIG_CPU_FREQ_TIMES */ |
| +#endif /* _LINUX_CPUFREQ_TIMES_H */ |
| diff --git a/include/linux/sched.h b/include/linux/sched.h |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -860,6 +860,10 @@ struct task_struct { |
| u64 stimescaled; |
| #endif |
| u64 gtime; |
| +#ifdef CONFIG_CPU_FREQ_TIMES |
| + u64 *time_in_state; |
| + unsigned int max_state; |
| +#endif |
| struct prev_cputime prev_cputime; |
| #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN |
| struct vtime vtime; |
| diff --git a/kernel/fork.c b/kernel/fork.c |
| --- a/kernel/fork.c |
| +++ b/kernel/fork.c |
| @@ -456,6 +456,8 @@ void put_task_stack(struct task_struct *tsk) |
| |
| void free_task(struct task_struct *tsk) |
| { |
| + cpufreq_task_times_exit(tsk); |
| + |
| #ifndef CONFIG_THREAD_INFO_IN_TASK |
| /* |
| * The task is finally done with both the stack and thread_info, |
| @@ -1930,6 +1932,8 @@ static __latent_entropy struct task_struct *copy_process( |
| if (!p) |
| goto fork_out; |
| |
| + cpufreq_task_times_init(p); |
| + |
| /* |
| * This _must_ happen before we call free_task(), i.e. before we jump |
| * to any of the bad_fork_* labels. This is to avoid freeing |
| @@ -2438,6 +2442,8 @@ long _do_fork(struct kernel_clone_args *args) |
| if (IS_ERR(p)) |
| return PTR_ERR(p); |
| |
| + cpufreq_task_times_alloc(p); |
| + |
| /* |
| * Do this prior waking up the new thread - the thread pointer |
| * might get invalid after that point, if the thread exits quickly. |
| diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c |
| --- a/kernel/sched/cputime.c |
| +++ b/kernel/sched/cputime.c |
| @@ -2,6 +2,7 @@ |
| /* |
| * Simple CPU accounting cgroup controller |
| */ |
| +#include <linux/cpufreq_times.h> |
| #include "sched.h" |
| |
| #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
| @@ -129,6 +130,9 @@ void account_user_time(struct task_struct *p, u64 cputime) |
| |
| /* Account for user time used */ |
| acct_account_cputime(p); |
| + |
| + /* Account power usage for user time */ |
| + cpufreq_acct_update_power(p, cputime); |
| } |
| |
| /* |
| @@ -173,6 +177,9 @@ void account_system_index_time(struct task_struct *p, |
| |
| /* Account for system time used */ |
| acct_account_cputime(p); |
| + |
| + /* Account power usage for system time */ |
| + cpufreq_acct_update_power(p, cputime); |
| } |
| |
| /* |