Emit swap size in the killed process' statsd atoms

Changes:
- We are already reading /proc/pid/status to resolve the tgid. While we
are at it, also parse RSS and swap values.
- Use the RSS and swap values for non memcg builds when creating the
statsd outputs
- Given we already read RSS, remove the separate read of /proc/pid/statm
that used to get tasksize.

Bug: 163116785
Test: manual, out/host/linux-x86/bin/statsd_testdrive 51
Change-Id: I9d98b9ffe8be0b014bb09174ec9532382cae1f38
diff --git a/lmkd.cpp b/lmkd.cpp
index ddf0e6a..1daf198 100644
--- a/lmkd.cpp
+++ b/lmkd.cpp
@@ -83,6 +83,8 @@
 #define MEMINFO_PATH "/proc/meminfo"
 #define VMSTAT_PATH "/proc/vmstat"
 #define PROC_STATUS_TGID_FIELD "Tgid:"
+#define PROC_STATUS_RSS_FIELD "VmRSS:"
+#define PROC_STATUS_SWAP_FIELD "VmSwap:"
 #define LINE_MAX 128
 
 #define PERCEPTIBLE_APP_ADJ 200
@@ -798,7 +800,7 @@
             mem_st.process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK));
             mem_st.rss_in_bytes = rss_in_pages * PAGE_SIZE;
             stats_write_lmk_kill_occurred_pid(uid, pid, oom_score_adj,
-                                              min_score_adj, 0, &mem_st);
+                                              min_score_adj, &mem_st);
         }
 
         free(taskname);
@@ -930,30 +932,33 @@
            (to->tv_nsec - from->tv_nsec) / (long)NS_PER_MS;
 }
 
-static int proc_get_tgid(int pid) {
+/* Reads /proc/pid/status into buf. */
+static bool read_proc_status(int pid, char *buf, size_t buf_sz) {
     char path[PATH_MAX];
-    char buf[PAGE_SIZE];
     int fd;
     ssize_t size;
-    char *pos;
-    int64_t tgid = -1;
 
     snprintf(path, PATH_MAX, "/proc/%d/status", pid);
     fd = open(path, O_RDONLY | O_CLOEXEC);
     if (fd < 0) {
-        return -1;
+        return false;
     }
 
-    size = read_all(fd, buf, sizeof(buf) - 1);
+    size = read_all(fd, buf, buf_sz - 1);
+    close(fd);
     if (size < 0) {
-        goto out;
+        return false;
     }
     buf[size] = 0;
+    return true;
+}
 
-    pos = buf;
+/* Looks for tag in buf and parses the first integer */
+static bool parse_status_tag(char *buf, const char *tag, int64_t *out) {
+    char *pos = buf;
     while (true) {
-        pos = strstr(pos, PROC_STATUS_TGID_FIELD);
-        /* Stop if TGID tag not found or found at the line beginning */
+        pos = strstr(pos, tag);
+        /* Stop if tag not found or found at the line beginning */
         if (pos == NULL || pos == buf || pos[-1] == '\n') {
             break;
         }
@@ -961,16 +966,12 @@
     }
 
     if (pos == NULL) {
-        goto out;
+        return false;
     }
 
-    pos += strlen(PROC_STATUS_TGID_FIELD);
-    while (*pos == ' ') pos++;
-    parse_int64(pos, &tgid);
-
-out:
-    close(fd);
-    return (int)tgid;
+    pos += strlen(tag);
+    while (*pos == ' ') ++pos;
+    return parse_int64(pos, out);
 }
 
 static int proc_get_size(int pid) {
@@ -1034,7 +1035,8 @@
     struct lmk_procprio params;
     bool is_system_server;
     struct passwd *pwdrec;
-    int tgid;
+    int64_t tgid;
+    char buf[PAGE_SIZE];
 
     lmkd_pack_get_procprio(packet, field_count, &params);
 
@@ -1050,11 +1052,12 @@
     }
 
     /* Check if registered process is a thread group leader */
-    tgid = proc_get_tgid(params.pid);
-    if (tgid >= 0 && tgid != params.pid) {
-        ALOGE("Attempt to register a task that is not a thread group leader (tid %d, tgid %d)",
-            params.pid, tgid);
-        return;
+    if (read_proc_status(params.pid, buf, sizeof(buf))) {
+        if (parse_status_tag(buf, PROC_STATUS_TGID_FIELD, &tgid) && tgid != params.pid) {
+            ALOGE("Attempt to register a task that is not a thread group leader "
+                  "(tid %d, tgid %" PRId64 ")", params.pid, tgid);
+            return;
+        }
     }
 
     /* gid containing AID_READPROC required */
@@ -1854,7 +1857,7 @@
     }
 }
 
-static void killinfo_log(struct proc* procp, int min_oom_score, int tasksize,
+static void killinfo_log(struct proc* procp, int min_oom_score, int rss_kb,
                          int kill_reason, union meminfo *mi,
                          struct wakeup_info *wi, struct timespec *tm) {
     /* log process information */
@@ -1862,7 +1865,7 @@
     android_log_write_int32(ctx, procp->uid);
     android_log_write_int32(ctx, procp->oomadj);
     android_log_write_int32(ctx, min_oom_score);
-    android_log_write_int32(ctx, (int32_t)min(tasksize * page_k, INT32_MAX));
+    android_log_write_int32(ctx, (int32_t)min(rss_kb, INT32_MAX));
     android_log_write_int32(ctx, kill_reason);
 
     /* log meminfo fields */
@@ -2040,38 +2043,48 @@
     maxevents++;
 }
 
-/* Kill one process specified by procp.  Returns the size of the process killed */
+/* Kill one process specified by procp.  Returns the size (in pages) of the process killed */
 static int kill_one_process(struct proc* procp, int min_oom_score, int kill_reason,
                             const char *kill_desc, union meminfo *mi, struct wakeup_info *wi,
                             struct timespec *tm) {
     int pid = procp->pid;
     int pidfd = procp->pidfd;
     uid_t uid = procp->uid;
-    int tgid;
     char *taskname;
-    int tasksize;
     int r;
     int result = -1;
     struct memory_stat *mem_st;
-    char buf[LINE_MAX];
+    int64_t tgid;
+    int64_t rss_kb;
+    int64_t swap_kb;
+    char buf[PAGE_SIZE];
 
-    tgid = proc_get_tgid(pid);
-    if (tgid >= 0 && tgid != pid) {
-        ALOGE("Possible pid reuse detected (pid %d, tgid %d)!", pid, tgid);
+    if (!read_proc_status(pid, buf, sizeof(buf))) {
+        goto out;
+    }
+    if (!parse_status_tag(buf, PROC_STATUS_TGID_FIELD, &tgid)) {
+        ALOGE("Unable to parse tgid from /proc/%d/status", pid);
+        goto out;
+    }
+    if (tgid != pid) {
+        ALOGE("Possible pid reuse detected (pid %d, tgid %" PRId64 ")!", pid, tgid);
+        goto out;
+    }
+    // Zombie processes will not have RSS / Swap fields.
+    if (!parse_status_tag(buf, PROC_STATUS_RSS_FIELD, &rss_kb)) {
+        goto out;
+    }
+    if (!parse_status_tag(buf, PROC_STATUS_SWAP_FIELD, &swap_kb)) {
         goto out;
     }
 
     taskname = proc_get_name(pid, buf, sizeof(buf));
+    // taskname will point inside buf, do not reuse buf onwards.
     if (!taskname) {
         goto out;
     }
 
-    tasksize = proc_get_size(pid);
-    if (tasksize <= 0) {
-        goto out;
-    }
-
-    mem_st = stats_read_memory_stat(per_app_memcg, pid, uid);
+    mem_st = stats_read_memory_stat(per_app_memcg, pid, uid, rss_kb * 1024, swap_kb * 1024);
 
     TRACE_KILL_START(pid);
 
@@ -2099,21 +2112,21 @@
 
     inc_killcnt(procp->oomadj);
 
-    killinfo_log(procp, min_oom_score, tasksize, kill_reason, mi, wi, tm);
+    killinfo_log(procp, min_oom_score, rss_kb, kill_reason, mi, wi, tm);
 
     if (kill_desc) {
-        ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB; reason: %s", taskname, pid,
-              uid, procp->oomadj, tasksize * page_k, kill_desc);
+        ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %" PRId64 "kB; reason: %s", taskname, pid,
+              uid, procp->oomadj, rss_kb, kill_desc);
     } else {
-        ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB", taskname, pid,
-              uid, procp->oomadj, tasksize * page_k);
+        ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %" PRId64 "kB", taskname, pid,
+              uid, procp->oomadj, rss_kb);
     }
 
-    stats_write_lmk_kill_occurred(uid, taskname, procp->oomadj, min_oom_score, tasksize, mem_st);
+    stats_write_lmk_kill_occurred(uid, taskname, procp->oomadj, min_oom_score, mem_st);
 
     ctrl_data_write_lmk_kill_occurred((pid_t)pid, uid);
 
-    result = tasksize;
+    result = rss_kb / page_k;
 
 out:
     /*
diff --git a/statslog.cpp b/statslog.cpp
index 8205a55..8b42d71 100644
--- a/statslog.cpp
+++ b/statslog.cpp
@@ -77,7 +77,7 @@
  */
 int
 stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
-                              int32_t oom_score, int32_t min_oom_score, int tasksize,
+                              int32_t oom_score, int32_t min_oom_score,
                               struct memory_stat *mem_st) {
     if (enable_stats_log) {
         return android::lmkd::stats::stats_write(
@@ -87,7 +87,7 @@
                 oom_score,
                 mem_st ? mem_st->pgfault : -1,
                 mem_st ? mem_st->pgmajfault : -1,
-                mem_st ? mem_st->rss_in_bytes : tasksize * BYTES_IN_KILOBYTE,
+                mem_st ? mem_st->rss_in_bytes : -1,
                 mem_st ? mem_st->cache_in_bytes : -1,
                 mem_st ? mem_st->swap_in_bytes : -1,
                 mem_st ? mem_st->process_start_time_ns : -1,
@@ -99,13 +99,12 @@
 }
 
 int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score,
-                                      int32_t min_oom_score, int tasksize,
+                                      int32_t min_oom_score,
                                       struct memory_stat* mem_st) {
     struct proc* proc = pid_lookup(pid);
     if (!proc) return -EINVAL;
 
-    return stats_write_lmk_kill_occurred(uid, proc->taskname, oom_score, min_oom_score,
-                                         tasksize, mem_st);
+    return stats_write_lmk_kill_occurred(uid, proc->taskname, oom_score, min_oom_score, mem_st);
 }
 
 static void memory_stat_parse_line(char* line, struct memory_stat* mem_st) {
@@ -170,26 +169,24 @@
     // field 10 is pgfault
     // field 12 is pgmajfault
     // field 22 is starttime
-    // field 24 is rss_in_pages
-    int64_t pgfault = 0, pgmajfault = 0, starttime = 0, rss_in_pages = 0;
+    int64_t pgfault = 0, pgmajfault = 0, starttime = 0;
     if (sscanf(buffer,
                "%*u %*s %*s %*d %*d %*d %*d %*d %*d %" SCNd64 " %*d "
                "%" SCNd64 " %*d %*u %*u %*d %*d %*d %*d %*d %*d "
-               "%" SCNd64 " %*d %" SCNd64 "",
-               &pgfault, &pgmajfault, &starttime, &rss_in_pages) != 4) {
+               "%" SCNd64 "",
+               &pgfault, &pgmajfault, &starttime) != 3) {
         return -1;
     }
     mem_st->pgfault = pgfault;
     mem_st->pgmajfault = pgmajfault;
-    mem_st->rss_in_bytes = (rss_in_pages * PAGE_SIZE);
     mem_st->process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK));
 
     return 0;
 }
 
-struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid) {
+struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid,
+                                           int64_t rss_bytes, int64_t swap_bytes) {
     static struct memory_stat mem_st = {};
-
     if (!enable_stats_log) {
         return NULL;
     }
@@ -200,6 +197,8 @@
         }
     } else {
         if (memory_stat_from_procfs(&mem_st, pid) == 0) {
+            mem_st.rss_in_bytes = rss_bytes;
+            mem_st.swap_in_bytes = swap_bytes;
             return &mem_st;
         }
     }
diff --git a/statslog.h b/statslog.h
index 5992a49..56d4a88 100644
--- a/statslog.h
+++ b/statslog.h
@@ -59,17 +59,21 @@
 int
 stats_write_lmk_kill_occurred(int32_t uid, char const* process_name,
                               int32_t oom_score, int32_t min_oom_score,
-                              int tasksize, struct memory_stat *mem_st);
+                              struct memory_stat *mem_st);
 
 /**
  * Logs the event when LMKD kills a process to reduce memory pressure.
  * Code: LMK_KILL_OCCURRED = 51
  */
 int stats_write_lmk_kill_occurred_pid(int32_t uid, int pid, int32_t oom_score,
-                                      int32_t min_oom_score, int tasksize,
+                                      int32_t min_oom_score,
                                       struct memory_stat* mem_st);
 
-struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid);
+/**
+ * Reads memory stats used to log the statsd atom. Returns non-null ptr on success.
+ */
+struct memory_stat *stats_read_memory_stat(bool per_app_memcg, int pid, uid_t uid,
+                                           int64_t rss_bytes, int64_t swap_bytes);
 
 /**
  * Registers a process taskname by pid, while it is still alive.
@@ -94,19 +98,21 @@
 static inline int
 stats_write_lmk_kill_occurred(int32_t uid __unused,
                               char const* process_name __unused, int32_t oom_score __unused,
-                              int32_t min_oom_score __unused, int tasksize __unused,
+                              int32_t min_oom_score __unused,
                               struct memory_stat *mem_st __unused) { return -EINVAL; }
 
 static inline int stats_write_lmk_kill_occurred_pid(int32_t uid __unused,
                                                     int pid __unused, int32_t oom_score __unused,
                                                     int32_t min_oom_score __unused,
-                                                    int tasksize __unused,
                                                     struct memory_stat* mem_st __unused) {
     return -EINVAL;
 }
 
 static inline struct memory_stat *stats_read_memory_stat(bool per_app_memcg __unused,
-                                    int pid __unused, uid_t uid __unused) { return NULL; }
+                                    int pid __unused, uid_t uid __unused,
+                                    int64_t rss_bytes __unused, int64_t swap_bytes __unused) {
+    return NULL;
+}
 
 static inline void stats_store_taskname(int pid __unused, const char* taskname __unused) {}