Emit swap size in the killed process' statsd atoms
Changes:
- We are already reading /proc/pid/status to resolve the tgid. While we
are at it, also parse RSS and swap values.
- Use the RSS and swap values for non memcg builds when creating the
statsd outputs
- Given we already read RSS, remove the separate read of /proc/pid/statm
that used to get tasksize.
Bug: 163116785
Test: manual, out/host/linux-x86/bin/statsd_testdrive 51
Change-Id: I9d98b9ffe8be0b014bb09174ec9532382cae1f38
diff --git a/lmkd.cpp b/lmkd.cpp
index ddf0e6a..1daf198 100644
--- a/lmkd.cpp
+++ b/lmkd.cpp
@@ -83,6 +83,8 @@
#define MEMINFO_PATH "/proc/meminfo"
#define VMSTAT_PATH "/proc/vmstat"
#define PROC_STATUS_TGID_FIELD "Tgid:"
+#define PROC_STATUS_RSS_FIELD "VmRSS:"
+#define PROC_STATUS_SWAP_FIELD "VmSwap:"
#define LINE_MAX 128
#define PERCEPTIBLE_APP_ADJ 200
@@ -798,7 +800,7 @@
mem_st.process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK));
mem_st.rss_in_bytes = rss_in_pages * PAGE_SIZE;
stats_write_lmk_kill_occurred_pid(uid, pid, oom_score_adj,
- min_score_adj, 0, &mem_st);
+ min_score_adj, &mem_st);
}
free(taskname);
@@ -930,30 +932,33 @@
(to->tv_nsec - from->tv_nsec) / (long)NS_PER_MS;
}
-static int proc_get_tgid(int pid) {
+/* Reads /proc/pid/status into buf. */
+static bool read_proc_status(int pid, char *buf, size_t buf_sz) {
char path[PATH_MAX];
- char buf[PAGE_SIZE];
int fd;
ssize_t size;
- char *pos;
- int64_t tgid = -1;
snprintf(path, PATH_MAX, "/proc/%d/status", pid);
fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0) {
- return -1;
+ return false;
}
- size = read_all(fd, buf, sizeof(buf) - 1);
+ size = read_all(fd, buf, buf_sz - 1);
+ close(fd);
if (size < 0) {
- goto out;
+ return false;
}
buf[size] = 0;
+ return true;
+}
- pos = buf;
+/* Looks for tag in buf and parses the first integer */
+static bool parse_status_tag(char *buf, const char *tag, int64_t *out) {
+ char *pos = buf;
while (true) {
- pos = strstr(pos, PROC_STATUS_TGID_FIELD);
- /* Stop if TGID tag not found or found at the line beginning */
+ pos = strstr(pos, tag);
+ /* Stop if tag not found or found at the line beginning */
if (pos == NULL || pos == buf || pos[-1] == '\n') {
break;
}
@@ -961,16 +966,12 @@
}
if (pos == NULL) {
- goto out;
+ return false;
}
- pos += strlen(PROC_STATUS_TGID_FIELD);
- while (*pos == ' ') pos++;
- parse_int64(pos, &tgid);
-
-out:
- close(fd);
- return (int)tgid;
+ pos += strlen(tag);
+ while (*pos == ' ') ++pos;
+ return parse_int64(pos, out);
}
static int proc_get_size(int pid) {
@@ -1034,7 +1035,8 @@
struct lmk_procprio params;
bool is_system_server;
struct passwd *pwdrec;
- int tgid;
+ int64_t tgid;
+ char buf[PAGE_SIZE];
lmkd_pack_get_procprio(packet, field_count, ¶ms);
@@ -1050,11 +1052,12 @@
}
/* Check if registered process is a thread group leader */
- tgid = proc_get_tgid(params.pid);
- if (tgid >= 0 && tgid != params.pid) {
- ALOGE("Attempt to register a task that is not a thread group leader (tid %d, tgid %d)",
- params.pid, tgid);
- return;
+ if (read_proc_status(params.pid, buf, sizeof(buf))) {
+ if (parse_status_tag(buf, PROC_STATUS_TGID_FIELD, &tgid) && tgid != params.pid) {
+ ALOGE("Attempt to register a task that is not a thread group leader "
+ "(tid %d, tgid %" PRId64 ")", params.pid, tgid);
+ return;
+ }
}
/* gid containing AID_READPROC required */
@@ -1854,7 +1857,7 @@
}
}
-static void killinfo_log(struct proc* procp, int min_oom_score, int tasksize,
+static void killinfo_log(struct proc* procp, int min_oom_score, int rss_kb,
int kill_reason, union meminfo *mi,
struct wakeup_info *wi, struct timespec *tm) {
/* log process information */
@@ -1862,7 +1865,7 @@
android_log_write_int32(ctx, procp->uid);
android_log_write_int32(ctx, procp->oomadj);
android_log_write_int32(ctx, min_oom_score);
- android_log_write_int32(ctx, (int32_t)min(tasksize * page_k, INT32_MAX));
+ android_log_write_int32(ctx, (int32_t)min(rss_kb, INT32_MAX));
android_log_write_int32(ctx, kill_reason);
/* log meminfo fields */
@@ -2040,38 +2043,48 @@
maxevents++;
}
-/* Kill one process specified by procp. Returns the size of the process killed */
+/* Kill one process specified by procp. Returns the size (in pages) of the process killed */
static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reason,
const char *kill_desc, union meminfo *mi, struct wakeup_info *wi,
struct timespec *tm) {
int pid = procp->pid;
int pidfd = procp->pidfd;
uid_t uid = procp->uid;
- int tgid;
char *taskname;
- int tasksize;
int r;
int result = -1;
struct memory_stat *mem_st;
- char buf[LINE_MAX];
+ int64_t tgid;
+ int64_t rss_kb;
+ int64_t swap_kb;
+ char buf[PAGE_SIZE];
- tgid = proc_get_tgid(pid);
- if (tgid >= 0 && tgid != pid) {
- ALOGE("Possible pid reuse detected (pid %d, tgid %d)!", pid, tgid);
+ if (!read_proc_status(pid, buf, sizeof(buf))) {
+ goto out;
+ }
+ if (!parse_status_tag(buf, PROC_STATUS_TGID_FIELD, &tgid)) {
+ ALOGE("Unable to parse tgid from /proc/%d/status", pid);
+ goto out;
+ }
+ if (tgid != pid) {
+ ALOGE("Possible pid reuse detected (pid %d, tgid %" PRId64 ")!", pid, tgid);
+ goto out;
+ }
+ // Zombie processes will not have RSS / Swap fields.
+ if (!parse_status_tag(buf, PROC_STATUS_RSS_FIELD, &rss_kb)) {
+ goto out;
+ }
+ if (!parse_status_tag(buf, PROC_STATUS_SWAP_FIELD, &swap_kb)) {
goto out;
}
taskname = proc_get_name(pid, buf, sizeof(buf));
+ // taskname will point inside buf, do not reuse buf onwards.
if (!taskname) {
goto out;
}
- tasksize = proc_get_size(pid);
- if (tasksize <= 0) {
- goto out;
- }
-
- mem_st = stats_read_memory_stat(per_app_memcg, pid, uid);
+ mem_st = stats_read_memory_stat(per_app_memcg, pid, uid, rss_kb * 1024, swap_kb * 1024);
TRACE_KILL_START(pid);
@@ -2099,21 +2112,21 @@
inc_killcnt(procp->oomadj);
- killinfo_log(procp, min_oom_score, tasksize, kill_reason, mi, wi, tm);
+ killinfo_log(procp, min_oom_score, rss_kb, kill_reason, mi, wi, tm);
if (kill_desc) {
- ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB; reason: %s", taskname, pid,
- uid, procp->oomadj, tasksize * page_k, kill_desc);
+ ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %" PRId64 "kB; reason: %s", taskname, pid,
+ uid, procp->oomadj, rss_kb, kill_desc);
} else {
- ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB", taskname, pid,
- uid, procp->oomadj, tasksize * page_k);
+ ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %" PRId64 "kB", taskname, pid,
+ uid, procp->oomadj, rss_kb);
}
- stats_write_lmk_kill_occurred(uid, taskname, procp->oomadj, min_oom_score, tasksize, mem_st);
+ stats_write_lmk_kill_occurred(uid, taskname, procp->oomadj, min_oom_score, mem_st);
ctrl_data_write_lmk_kill_occurred((pid_t)pid, uid);
- result = tasksize;
+ result = rss_kb / page_k;
out:
/*
diff --git a/statslog.cpp b/statslog.cpp
index 8205a55..8b42d71 100644
--- a/statslog.cpp
+++ b/statslog.cpp
@@ -77,7 +77,7 @@
*/
int
stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
- int32_t oom_score, int32_t min_oom_score, int tasksize,
+ int32_t oom_score, int32_t min_oom_score,
struct memory_stat *mem_st) {
if (enable_stats_log) {
return android::lmkd::stats::stats_write(
@@ -87,7 +87,7 @@
oom_score,
mem_st ? mem_st->pgfault : -1,
mem_st ? mem_st->pgmajfault : -1,
- mem_st ? mem_st->rss_in_bytes : tasksize * BYTES_IN_KILOBYTE,
+ mem_st ? mem_st->rss_in_bytes : -1,
mem_st ? mem_st->cache_in_bytes : -1,
mem_st ? mem_st->swap_in_bytes : -1,
mem_st ? mem_st->process_start_time_ns : -1,
@@ -99,13 +99,12 @@
}
int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score,
- int32_t min_oom_score, int tasksize,
+ int32_t min_oom_score,
struct memory_stat* mem_st) {
struct proc* proc = pid_lookup(pid);
if (!proc) return -EINVAL;
- return stats_write_lmk_kill_occurred(uid, proc->taskname, oom_score, min_oom_score,
- tasksize, mem_st);
+ return stats_write_lmk_kill_occurred(uid, proc->taskname, oom_score, min_oom_score, mem_st);
}
static void memory_stat_parse_line(char* line, struct memory_stat* mem_st) {
@@ -170,26 +169,24 @@
// field 10 is pgfault
// field 12 is pgmajfault
// field 22 is starttime
- // field 24 is rss_in_pages
- int64_t pgfault = 0, pgmajfault = 0, starttime = 0, rss_in_pages = 0;
+ int64_t pgfault = 0, pgmajfault = 0, starttime = 0;
if (sscanf(buffer,
"%*u %*s %*s %*d %*d %*d %*d %*d %*d %" SCNd64 " %*d "
"%" SCNd64 " %*d %*u %*u %*d %*d %*d %*d %*d %*d "
- "%" SCNd64 " %*d %" SCNd64 "",
- &pgfault, &pgmajfault, &starttime, &rss_in_pages) != 4) {
+ "%" SCNd64 "",
+ &pgfault, &pgmajfault, &starttime) != 3) {
return -1;
}
mem_st->pgfault = pgfault;
mem_st->pgmajfault = pgmajfault;
- mem_st->rss_in_bytes = (rss_in_pages * PAGE_SIZE);
mem_st->process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK));
return 0;
}
-struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid) {
+struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid,
+ int64_t rss_bytes, int64_t swap_bytes) {
static struct memory_stat mem_st = {};
-
if (!enable_stats_log) {
return NULL;
}
@@ -200,6 +197,8 @@
}
} else {
if (memory_stat_from_procfs(&mem_st, pid) == 0) {
+ mem_st.rss_in_bytes = rss_bytes;
+ mem_st.swap_in_bytes = swap_bytes;
return &mem_st;
}
}
diff --git a/statslog.h b/statslog.h
index 5992a49..56d4a88 100644
--- a/statslog.h
+++ b/statslog.h
@@ -59,17 +59,21 @@
int
stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
int32_t oom_score, int32_t min_oom_score,
- int tasksize, struct memory_stat *mem_st);
+ struct memory_stat *mem_st);
/**
* Logs the event when LMKD kills a process to reduce memory pressure.
* Code: LMK_KILL_OCCURRED = 51
*/
int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score,
- int32_t min_oom_score, int tasksize,
+ int32_t min_oom_score,
struct memory_stat* mem_st);
-struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid);
+/**
+ * Reads memory stats used to log the statsd atom. Returns non-null ptr on success.
+ */
+struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid,
+ int64_t rss_bytes, int64_t swap_bytes);
/**
* Registers a process taskname by pid, while it is still alive.
@@ -94,19 +98,21 @@
static inline int
stats_write_lmk_kill_occurred(int32_t uid __unused,
char const* process_name __unused, int32_t oom_score __unused,
- int32_t min_oom_score __unused, int tasksize __unused,
+ int32_t min_oom_score __unused,
struct memory_stat *mem_st __unused) { return -EINVAL; }
static inline int stats_write_lmk_kill_occurred_pid(int32_t uid __unused,
int pid __unused, int32_t oom_score __unused,
int32_t min_oom_score __unused,
- int tasksize __unused,
struct memory_stat* mem_st __unused) {
return -EINVAL;
}
static inline struct memory_stat *stats_read_memory_stat(bool per_app_memcg __unused,
- int pid __unused, uid_t uid __unused) { return NULL; }
+ int pid __unused, uid_t uid __unused,
+ int64_t rss_bytes __unused, int64_t swap_bytes __unused) {
+ return NULL;
+}
static inline void stats_store_taskname(int pid __unused, const char* taskname __unused) {}