time_in_state: change map format to improve read performance

BPF map formats must be defined at compile time, and the uid_times_map
must be able to accommodate the full range of frequencies available on
a device, but information about the number of available frequencies is
not available at compile time.

The current map format addresses this by using a separate map entry
for every uid-frequency combination that has occurred, but this
results in significant overhead since fetching stats for a single UID
can require dozens of bpf() syscalls.

Instead, define a map where each value holds times for 32
frequencies. This preserves flexibility (devices with more than 32
frequencies per cluster will simply have 2 or more key-value pairs per
UID) while drastically reducing the number of lookups required to
fetch all data for a UID.

Test: libtimeinstate_test passes
Bug: 138317993
Change-Id: I4b4f74ae52bad820f1b9b63212e2d702af97a49f
Signed-off-by: Connor O'Brien <connoro@google.com>
diff --git a/time_in_state.c b/time_in_state.c
index b91de5e..b0e7acb 100644
--- a/time_in_state.c
+++ b/time_in_state.c
@@ -16,16 +16,31 @@
 
 #include <bpf_helpers.h>
 
+#define FREQS_PER_ENTRY 32
+
 typedef struct {
     uint32_t uid;
-    uint32_t freq;
+    uint32_t bucket;
 } time_key;
 
-DEFINE_BPF_MAP(uid_times_map, PERCPU_HASH, time_key, uint64_t, 10240)
+typedef struct {
+    uint64_t ar[FREQS_PER_ENTRY];
+} time_val;
+
+DEFINE_BPF_MAP(uid_times_map, PERCPU_HASH, time_key, time_val, 1024)
 DEFINE_BPF_MAP(cpu_last_update_map, PERCPU_ARRAY, uint32_t, uint64_t, 1)
 
 DEFINE_BPF_MAP(cpu_policy_map, ARRAY, uint32_t, uint32_t, 1024)
-DEFINE_BPF_MAP(policy_freq_map, ARRAY, uint32_t, uint32_t, 1024)
+DEFINE_BPF_MAP(policy_freq_idx_map, ARRAY, uint32_t, uint8_t, 1024)
+
+typedef struct {
+	uint32_t policy;
+	uint32_t freq;
+} freq_idx_key;
+
+DEFINE_BPF_MAP(freq_to_idx_map, HASH, freq_idx_key, uint8_t, 2048)
+
+DEFINE_BPF_MAP(zero_map, ARRAY, uint32_t, time_val, 1)
 
 struct switch_args {
     unsigned long long ignore;
@@ -53,17 +68,23 @@
     uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
     if (!policyp) return 0;
     uint32_t policy = *policyp;
-    uint32_t* freq = bpf_policy_freq_map_lookup_elem(&policy);
-    if (!freq || !*freq) return 0;
+    uint8_t* freq_idxp = bpf_policy_freq_idx_map_lookup_elem(&policy);
+    if (!freq_idxp || !*freq_idxp) return 0;
+    // freq_to_idx_map uses 1 as its minimum index so that *freq_idxp == 0 only when uninitialized
+    uint8_t freq_idx = *freq_idxp - 1;
 
     uint32_t uid = bpf_get_current_uid_gid();
-    time_key key = {.uid = uid, .freq = *freq};
-    uint64_t* tot_time = bpf_uid_times_map_lookup_elem(&key);
+    time_key key = {.uid = uid, .bucket = freq_idx / FREQS_PER_ENTRY};
+    time_val* val = bpf_uid_times_map_lookup_elem(&key);
+    if (!val) {
+        time_val* zero_valp = bpf_zero_map_lookup_elem(&zero);
+        if (!zero_valp) return 0;
+        time_val zero_val = *zero_valp;
+        bpf_uid_times_map_update_elem(&key, &zero_val, BPF_NOEXIST);
+        val = bpf_uid_times_map_lookup_elem(&key);
+    }
     uint64_t delta = time - old_last;
-    if (!tot_time)
-        bpf_uid_times_map_update_elem(&key, &delta, BPF_ANY);
-    else
-        *tot_time += delta;
+    if (val) val->ar[freq_idx % FREQS_PER_ENTRY] += delta;
     return 0;
 }
 
@@ -80,7 +101,11 @@
     uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu);
     if (!policyp) return 0;
     uint32_t policy = *policyp;
-    bpf_policy_freq_map_update_elem(&policy, &new, BPF_ANY);
+    freq_idx_key key = {.policy = policy, .freq = new};
+    uint8_t* idxp = bpf_freq_to_idx_map_lookup_elem(&key);
+    if (!idxp) return 0;
+    uint8_t idx = *idxp;
+    bpf_policy_freq_idx_map_update_elem(&policy, &idx, BPF_ANY);
     return 0;
 }