lmkd: Add current and max thrashing levels in LMK_MEMORY_STATS reports
Thrashing threshold tuning requires collecting thrashing level data from
the field and correlating these levels with other indications of device
being non-responsive.
Include current and max thrashing levels in the lmkd kill reports. Max
thrashing level captures the highest level seen since the last kill report.
Bug: 194433891
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: I8a34dc41e7f03668bfad4ac2cbcb5d2570a10752
Merged-In: I8a34dc41e7f03668bfad4ac2cbcb5d2570a10752
diff --git a/lmkd.cpp b/lmkd.cpp
index f3c301e..2f0df91 100644
--- a/lmkd.cpp
+++ b/lmkd.cpp
@@ -2110,10 +2110,16 @@
maxevents++;
}
+struct kill_info {
+ enum kill_reasons kill_reason;
+ const char *kill_desc;
+ int thrashing;
+ int max_thrashing;
+};
+
/* Kill one process specified by procp. Returns the size (in pages) of the process killed */
-static int kill_one_process(struct proc* procp, int min_oom_score, enum kill_reasons kill_reason,
- const char *kill_desc, union meminfo *mi, struct wakeup_info *wi,
- struct timespec *tm) {
+static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_info *ki,
+ union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) {
int pid = procp->pid;
int pidfd = procp->pidfd;
uid_t uid = procp->uid;
@@ -2180,19 +2186,25 @@
inc_killcnt(procp->oomadj);
- killinfo_log(procp, min_oom_score, rss_kb, swap_kb, kill_reason, mi, wi, tm);
-
- if (kill_desc) {
+ if (ki) {
+ kill_st.kill_reason = ki->kill_reason;
+ kill_st.thrashing = ki->thrashing;
+ kill_st.max_thrashing = ki->max_thrashing;
+ killinfo_log(procp, min_oom_score, rss_kb, swap_kb, ki->kill_reason, mi, wi, tm);
ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64
- "kB swap; reason: %s", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb, kill_desc);
+ "kB swap; reason: %s", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb,
+ ki->kill_desc);
} else {
+ kill_st.kill_reason = NONE;
+ kill_st.thrashing = 0;
+ kill_st.max_thrashing = 0;
+ killinfo_log(procp, min_oom_score, rss_kb, swap_kb, NONE, mi, wi, tm);
ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64
"kb swap", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb);
}
kill_st.uid = static_cast<int32_t>(uid);
kill_st.taskname = taskname;
- kill_st.kill_reason = kill_reason;
kill_st.oom_score = procp->oomadj;
kill_st.min_oom_score = min_oom_score;
kill_st.free_mem_kb = mi->field.nr_free_pages * page_k;
@@ -2216,8 +2228,7 @@
* Find one process to kill at or above the given oom_score_adj level.
* Returns size of the killed process.
*/
-static int find_and_kill_process(int min_score_adj, enum kill_reasons kill_reason,
- const char *kill_desc, union meminfo *mi,
+static int find_and_kill_process(int min_score_adj, struct kill_info *ki, union meminfo *mi,
struct wakeup_info *wi, struct timespec *tm) {
int i;
int killed_size = 0;
@@ -2242,8 +2253,7 @@
if (!procp)
break;
- killed_size = kill_one_process(procp, min_score_adj, kill_reason, kill_desc,
- mi, wi, tm);
+ killed_size = kill_one_process(procp, min_score_adj, ki, mi, wi, tm);
if (killed_size >= 0) {
if (!lmk_state_change_start) {
lmk_state_change_start = true;
@@ -2399,6 +2409,7 @@
static struct timespec thrashing_reset_tm;
static int64_t prev_thrash_growth = 0;
static bool check_filecache = false;
+ static int max_thrashing = 0;
union meminfo mi;
union vmstat vs;
@@ -2524,6 +2535,9 @@
}
/* Add previous cycle's decayed thrashing amount */
thrashing += prev_thrash_growth;
+ if (max_thrashing < thrashing) {
+ max_thrashing = thrashing;
+ }
/*
* Refresh watermarks once per min in case user updated one of the margins.
@@ -2636,10 +2650,16 @@
/* Kill a process if necessary */
if (kill_reason != NONE) {
- int pages_freed = find_and_kill_process(min_score_adj, kill_reason, kill_desc, &mi,
- &wi, &curr_tm);
+ struct kill_info ki = {
+ .kill_reason = kill_reason,
+ .kill_desc = kill_desc,
+ .thrashing = (int)thrashing,
+ .max_thrashing = max_thrashing,
+ };
+ int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm);
if (pages_freed > 0) {
killing = true;
+ max_thrashing = 0;
if (cut_thrashing_limit) {
/*
* Cut thrasing limit by thrashing_limit_decay_pct percentage of the current
@@ -2856,7 +2876,7 @@
do_kill:
if (low_ram_device) {
/* For Go devices kill only one task */
- if (find_and_kill_process(level_oomadj[level], NONE, NULL, &mi, &wi, &curr_tm) == 0) {
+ if (find_and_kill_process(level_oomadj[level], NULL, &mi, &wi, &curr_tm) == 0) {
if (debug_process_killing) {
ALOGI("Nothing to kill");
}
@@ -2879,7 +2899,7 @@
min_score_adj = level_oomadj[level];
}
- pages_freed = find_and_kill_process(min_score_adj, NONE, NULL, &mi, &wi, &curr_tm);
+ pages_freed = find_and_kill_process(min_score_adj, NULL, &mi, &wi, &curr_tm);
if (pages_freed == 0) {
/* Rate limit kill reports when nothing was reclaimed */
diff --git a/statslog.cpp b/statslog.cpp
index ba39f54..6568f73 100644
--- a/statslog.cpp
+++ b/statslog.cpp
@@ -323,6 +323,8 @@
index = pack_int32(packet, index, (int)kill_stat->free_mem_kb);
index = pack_int32(packet, index, (int)kill_stat->free_swap_kb);
index = pack_int32(packet, index, (int)kill_stat->kill_reason);
+ index = pack_int32(packet, index, kill_stat->thrashing);
+ index = pack_int32(packet, index, kill_stat->max_thrashing);
index = pack_string(packet, index, kill_stat->taskname);
return index;
diff --git a/statslog.h b/statslog.h
index 44af35f..89e4d2e 100644
--- a/statslog.h
+++ b/statslog.h
@@ -35,13 +35,13 @@
* Max LMKD reply packet length in bytes
* Notes about size calculation:
* 4 bytes for packet type
- * 80 bytes for the LmkKillOccurred fields: memory_stat + kill_stat
+ * 88 bytes for the LmkKillOccurred fields: memory_stat + kill_stat
* 2 bytes for process name string size
* MAX_TASKNAME_LEN bytes for the process name string
*
* Must be in sync with LmkdConnection.java
*/
-#define LMKD_REPLY_MAX_SIZE 214
+#define LMKD_REPLY_MAX_SIZE 222
/* LMK_MEMORY_STATS packet payload */
struct memory_stat {
@@ -76,6 +76,8 @@
int32_t min_oom_score;
int64_t free_mem_kb;
int64_t free_swap_kb;
+ int32_t thrashing;
+ int32_t max_thrashing;
};
/* LMKD reply packet to hold data for the LmkKillOccurred statsd atom */