Snap for 8564071 from b05a63e6104fcaf221813ccf4740b059b7982e69 to mainline-tethering-release

Change-Id: I36fc31ded4324cc659a8c1ae28bd96a63fa7851d
diff --git a/Android.bp b/Android.bp
index 4f53ded..0481c96 100644
--- a/Android.bp
+++ b/Android.bp
@@ -12,7 +12,11 @@
 cc_binary {
     name: "lmkd",
 
-    srcs: ["lmkd.cpp"],
+    srcs: [
+        "lmkd.cpp",
+        "reaper.cpp",
+        "watchdog.cpp",
+    ],
     shared_libs: [
         "libcutils",
         "liblog",
@@ -36,6 +40,7 @@
     init_rc: ["lmkd.rc"],
     defaults: ["stats_defaults"],
     logtags: ["event.logtags"],
+    afdo: true,
 }
 
 cc_library_static {
@@ -50,6 +55,7 @@
     defaults: ["stats_defaults"],
     shared_libs: [
         "liblog",
+        "libprocessgroup",
     ],
 }
 
@@ -59,6 +65,7 @@
     recovery_available: true,
     shared_libs: [
         "libcutils",
+        "libprocessgroup",
     ],
     export_include_dirs: ["include"],
     cppflags: [
diff --git a/event.logtags b/event.logtags
index 5382b49..dcab73c 100644
--- a/event.logtags
+++ b/event.logtags
@@ -21,6 +21,7 @@
 # 2: int64_t
 # 3: string
 # 4: list
+# 5: float
 #
 # The data unit is a number taken from the following list:
 # 1: Number of objects
@@ -35,4 +36,4 @@
 # TODO: generate ".java" and ".h" files with integer constants from this file.
 
 # for killinfo logs
-10195355 killinfo (Pid|1|5),(Uid|1|5),(OomAdj|1),(MinOomAdj|1),(TaskSize|1),(enum kill_reasons|1|5),(MemFree|1),(Cached|1),(SwapCached|1),(Buffers|1),(Shmem|1),(Unevictable|1),(SwapTotal|1),(SwapFree|1),(ActiveAnon|1),(InactiveAnon|1),(ActiveFile|1),(InactiveFile|1),(SReclaimable|1),(SUnreclaim|1),(KernelStack|1),(PageTables|1),(IonHeap|1),(IonHeapPool|1),(CmaFree|1),(MsSinceEvent|1),(MsSincePrevWakeup|1),(WakeupsSinceEvent|1),(SkippedWakeups|1),(TaskSwapSize|1),(GPU|1)
+10195355 killinfo (Pid|1|5),(Uid|1|5),(OomAdj|1),(MinOomAdj|1),(TaskSize|1),(enum kill_reasons|1|5),(MemFree|1),(Cached|1),(SwapCached|1),(Buffers|1),(Shmem|1),(Unevictable|1),(SwapTotal|1),(SwapFree|1),(ActiveAnon|1),(InactiveAnon|1),(ActiveFile|1),(InactiveFile|1),(SReclaimable|1),(SUnreclaim|1),(KernelStack|1),(PageTables|1),(IonHeap|1),(IonHeapPool|1),(CmaFree|1),(MsSinceEvent|1),(MsSincePrevWakeup|1),(WakeupsSinceEvent|1),(SkippedWakeups|1),(TaskSwapSize|1),(GPU|1),(Thrashing|1),(MaxThrashing|1),(PsiMemSome|5),(PsiMemFull|5),(PsiIoSome|5),(PsiIoFull|5),(PsiCpuSome|5)
diff --git a/liblmkd_utils.cpp b/liblmkd_utils.cpp
index 55d7f62..e5e99de 100644
--- a/liblmkd_utils.cpp
+++ b/liblmkd_utils.cpp
@@ -22,8 +22,9 @@
 #include <stdio.h>
 #include <unistd.h>
 
-#include <liblmkd_utils.h>
 #include <cutils/sockets.h>
+#include <liblmkd_utils.h>
+#include <processgroup/processgroup.h>
 
 int lmkd_connect() {
     return socket_local_client("lmkd",
@@ -78,34 +79,6 @@
 }
 
 int create_memcg(uid_t uid, pid_t pid) {
-    char buf[256];
-    int tasks_file;
-    int written;
-
-    snprintf(buf, sizeof(buf), "/dev/memcg/apps/uid_%u", uid);
-    if (mkdir(buf, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) < 0 &&
-        errno != EEXIST) {
-        return -1;
-    }
-
-    snprintf(buf, sizeof(buf), "/dev/memcg/apps/uid_%u/pid_%u", uid, pid);
-    if (mkdir(buf, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) < 0 &&
-        errno != EEXIST) {
-        return -1;
-    }
-
-    snprintf(buf, sizeof(buf), "/dev/memcg/apps/uid_%u/pid_%u/tasks", uid, pid);
-    tasks_file = open(buf, O_WRONLY);
-    if (tasks_file < 0) {
-        return -2;
-    }
-    written = snprintf(buf, sizeof(buf), "%u", pid);
-    if (__predict_false(written >= (int)sizeof(buf))) {
-        written = sizeof(buf) - 1;
-    }
-    written = TEMP_FAILURE_RETRY(write(tasks_file, buf, written));
-    close(tasks_file);
-
-    return (written < 0) ? -3 : 0;
+    return createProcessGroup(uid, pid, true) == 0 ? 0 : -1;
 }
 
diff --git a/libpsi/include/psi/psi.h b/libpsi/include/psi/psi.h
index cd49e8b..9162446 100644
--- a/libpsi/include/psi/psi.h
+++ b/libpsi/include/psi/psi.h
@@ -22,12 +22,29 @@
 
 __BEGIN_DECLS
 
+#define PSI_PATH_MEMORY	"/proc/pressure/memory"
+#define PSI_PATH_IO	"/proc/pressure/io"
+#define PSI_PATH_CPU	"/proc/pressure/cpu"
+
 enum psi_stall_type {
     PSI_SOME,
     PSI_FULL,
     PSI_TYPE_COUNT
 };
 
+struct psi_stats {
+    float avg10;
+    float avg60;
+    float avg300;
+    unsigned long total;
+};
+
+struct psi_data {
+    struct psi_stats mem_stats[PSI_TYPE_COUNT];
+    struct psi_stats io_stats[PSI_TYPE_COUNT];
+    struct psi_stats cpu_stats[PSI_TYPE_COUNT];
+};
+
 /*
  * Initializes psi monitor.
  * stall_type, threshold_us and window_us are monitor parameters
@@ -63,6 +80,13 @@
  */
 void destroy_psi_monitor(int fd);
 
+/*
+ * Parse psi file line content. Expected file format is:
+ *    some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+ *    full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+ */
+int parse_psi_line(char *line, enum psi_stall_type stall_type, struct psi_stats stats[]);
+
 __END_DECLS
 
 #endif  // __ANDROID_PSI_H__
diff --git a/libpsi/psi.cpp b/libpsi/psi.cpp
index 89f07ed..54f9971 100644
--- a/libpsi/psi.cpp
+++ b/libpsi/psi.cpp
@@ -28,8 +28,6 @@
 #include <stdio.h>
 #include "psi/psi.h"
 
-#define PSI_MON_FILE_MEMORY "/proc/pressure/memory"
-
 static const char* stall_type_name[] = {
         "some",
         "full",
@@ -41,7 +39,7 @@
     int res;
     char buf[256];
 
-    fd = TEMP_FAILURE_RETRY(open(PSI_MON_FILE_MEMORY, O_WRONLY | O_CLOEXEC));
+    fd = TEMP_FAILURE_RETRY(open(PSI_PATH_MEMORY, O_WRONLY | O_CLOEXEC));
     if (fd < 0) {
         ALOGE("No kernel psi monitor support (errno=%d)", errno);
         return -1;
@@ -61,7 +59,7 @@
 
     if (res >= (ssize_t)sizeof(buf)) {
         ALOGE("%s line overflow for psi stall type '%s'",
-            PSI_MON_FILE_MEMORY, stall_type_name[stall_type]);
+            PSI_PATH_MEMORY, stall_type_name[stall_type]);
         errno = EINVAL;
         goto err;
     }
@@ -69,7 +67,7 @@
     res = TEMP_FAILURE_RETRY(write(fd, buf, strlen(buf) + 1));
     if (res < 0) {
         ALOGE("%s write failed for psi stall type '%s'; errno=%d",
-            PSI_MON_FILE_MEMORY, stall_type_name[stall_type], errno);
+            PSI_PATH_MEMORY, stall_type_name[stall_type], errno);
         goto err;
     }
 
@@ -102,3 +100,17 @@
         close(fd);
     }
 }
+
+int parse_psi_line(char *line, enum psi_stall_type stall_type, struct psi_stats stats[]) {
+    char type_name[5];
+    struct psi_stats *stat = &stats[stall_type];
+
+    if (!line || sscanf(line, "%4s avg10=%f avg60=%f avg300=%f total=%lu",
+        type_name, &stat->avg10, &stat->avg60, &stat->avg300, &stat->total) != 5) {
+        return -1;
+    }
+    if (strcmp(type_name, stall_type_name[stall_type])) {
+        return -1;
+    }
+    return 0;
+}
diff --git a/lmkd.cpp b/lmkd.cpp
index 68f9729..741e891 100644
--- a/lmkd.cpp
+++ b/lmkd.cpp
@@ -16,12 +16,10 @@
 
 #define LOG_TAG "lowmemorykiller"
 
-#include <dirent.h>
 #include <errno.h>
 #include <inttypes.h>
 #include <pwd.h>
 #include <sched.h>
-#include <signal.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
@@ -30,17 +28,17 @@
 #include <sys/eventfd.h>
 #include <sys/mman.h>
 #include <sys/pidfd.h>
-#include <sys/resource.h>
 #include <sys/socket.h>
 #include <sys/syscall.h>
 #include <sys/sysinfo.h>
-#include <sys/time.h>
-#include <sys/types.h>
 #include <time.h>
 #include <unistd.h>
 
+#include <algorithm>
+#include <array>
+#include <shared_mutex>
+
 #include <cutils/properties.h>
-#include <cutils/sched_policy.h>
 #include <cutils/sockets.h>
 #include <liblmkd_utils.h>
 #include <lmkd.h>
@@ -48,10 +46,12 @@
 #include <log/log_event_list.h>
 #include <log/log_time.h>
 #include <private/android_filesystem_config.h>
+#include <processgroup/processgroup.h>
 #include <psi/psi.h>
-#include <system/thread_defs.h>
 
+#include "reaper.h"
 #include "statslog.h"
+#include "watchdog.h"
 
 #define BPF_FD_JUST_USE_INT
 #include "BpfSyscallWrappers.h"
@@ -65,13 +65,20 @@
 #define ATRACE_TAG ATRACE_TAG_ALWAYS
 #include <cutils/trace.h>
 
-#define TRACE_KILL_START(pid) ATRACE_INT(__FUNCTION__, pid);
-#define TRACE_KILL_END()      ATRACE_INT(__FUNCTION__, 0);
+static inline void trace_kill_start(int pid, const char *desc) {
+    ATRACE_INT("kill_one_process", pid);
+    ATRACE_BEGIN(desc);
+}
+
+static inline void trace_kill_end() {
+    ATRACE_END();
+    ATRACE_INT("kill_one_process", 0);
+}
 
 #else /* LMKD_TRACE_KILLS */
 
-#define TRACE_KILL_START(pid) ((void)(pid))
-#define TRACE_KILL_END() ((void)0)
+static inline void trace_kill_start(int, const char *) {}
+static inline void trace_kill_end() {}
 
 #endif /* LMKD_TRACE_KILLS */
 
@@ -79,9 +86,6 @@
 #define __unused __attribute__((__unused__))
 #endif
 
-#define MEMCG_SYSFS_PATH "/dev/memcg/"
-#define MEMCG_MEMORY_USAGE "/dev/memcg/memory.usage_in_bytes"
-#define MEMCG_MEMORYSW_USAGE "/dev/memcg/memory.memsw.usage_in_bytes"
 #define ZONEINFO_PATH "/proc/zoneinfo"
 #define MEMINFO_PATH "/proc/meminfo"
 #define VMSTAT_PATH "/proc/vmstat"
@@ -99,7 +103,6 @@
 #define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree"
 #define INKERNEL_ADJ_PATH "/sys/module/lowmemorykiller/parameters/adj"
 
-#define ARRAY_SIZE(x)   (sizeof(x) / sizeof(*(x)))
 #define EIGHT_MEGA (1 << 23)
 
 #define TARGET_UPDATE_MIN_INTERVAL_MS 1000
@@ -136,9 +139,6 @@
 /* Polling period after PSI signal when pressure is low */
 #define PSI_POLL_PERIOD_LONG_MS 100
 
-#define min(a, b) (((a) < (b)) ? (a) : (b))
-#define max(a, b) (((a) > (b)) ? (a) : (b))
-
 #define FAIL_REPORT_RLIMIT_MS 1000
 
 /*
@@ -160,6 +160,8 @@
 
 #define LMKD_REINIT_PROP "lmkd.reinit"
 
+#define WATCHDOG_TIMEOUT_SEC 2
+
 /* default to old in-kernel interface if no memory pressure events */
 static bool use_inkernel_interface = true;
 static bool has_inkernel_module;
@@ -212,6 +214,7 @@
 static int thrashing_critical_pct;
 static int swap_util_max;
 static int64_t filecache_min_kb;
+static int64_t stall_limit_critical;
 static bool use_psi_monitors = false;
 static int kpoll_fd;
 static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = {
@@ -221,6 +224,8 @@
 };
 
 static android_log_context ctx;
+static Reaper reaper;
+static int reaper_comm_fd[2];
 
 enum polling_update {
     POLLING_DO_NOT_CHANGE,
@@ -270,9 +275,9 @@
 
 /*
  * 1 ctrl listen socket, 3 ctrl data socket, 3 memory pressure levels,
- * 1 lmk events + 1 fd to wait for process death
+ * 1 lmk events + 1 fd to wait for process death + 1 fd to receive kill failure notifications
  */
-#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1)
+#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1 + 1)
 static int epollfd;
 static int maxevents;
 
@@ -280,8 +285,8 @@
 #define OOM_SCORE_ADJ_MIN       (-1000)
 #define OOM_SCORE_ADJ_MAX       1000
 
-static int lowmem_adj[MAX_TARGETS];
-static int lowmem_minfree[MAX_TARGETS];
+static std::array<int, MAX_TARGETS> lowmem_adj;
+static std::array<int, MAX_TARGETS> lowmem_minfree;
 static int lowmem_targets_size;
 
 /* Fields to parse in /proc/zoneinfo */
@@ -520,6 +525,11 @@
 
 #define ADJTOSLOT(adj) ((adj) + -OOM_SCORE_ADJ_MIN)
 #define ADJTOSLOT_COUNT (ADJTOSLOT(OOM_SCORE_ADJ_MAX) + 1)
+
+// protects procadjslot_list from concurrent access
+static std::shared_mutex adjslot_list_lock;
+// procadjslot_list should be modified only from the main thread while exclusively holding
+// adjslot_list_lock. Readers from non-main threads should hold adjslot_list_lock shared lock.
 static struct adjslot_list procadjslot_list[ADJTOSLOT_COUNT];
 
 #define MAX_DISTINCT_OOM_ADJ 32
@@ -542,7 +552,7 @@
 static void destroy_monitors();
 
 static int clamp(int low, int high, int value) {
-    return max(min(value, high), low);
+    return std::max(std::min(value, high), low);
 }
 
 static bool parse_int64(const char* str, int64_t* ret) {
@@ -823,7 +833,7 @@
 
     while (1) {
         char rd_buf[256];
-        int bytes_read = TEMP_FAILURE_RETRY(pread(poll_fd, (void*)rd_buf, sizeof(rd_buf), 0));
+        int bytes_read = TEMP_FAILURE_RETRY(pread(poll_fd, (void*)rd_buf, sizeof(rd_buf) - 1, 0));
         if (bytes_read <= 0) break;
         rd_buf[bytes_read] = '\0';
 
@@ -909,13 +919,18 @@
     return asl == head ? NULL : asl;
 }
 
+// Should be modified only from the main thread.
 static void proc_slot(struct proc *procp) {
     int adjslot = ADJTOSLOT(procp->oomadj);
+    std::scoped_lock lock(adjslot_list_lock);
 
     adjslot_insert(&procadjslot_list[adjslot], &procp->asl);
 }
 
+// Should be modified only from the main thread.
 static void proc_unslot(struct proc *procp) {
+    std::scoped_lock lock(adjslot_list_lock);
+
     adjslot_remove(&procp->asl);
 }
 
@@ -1165,9 +1180,12 @@
             soft_limit_mult = 64;
         }
 
-        snprintf(path, sizeof(path), MEMCG_SYSFS_PATH
-                 "apps/uid_%d/pid_%d/memory.soft_limit_in_bytes",
-                 params.uid, params.pid);
+        std::string path;
+        if (!CgroupGetAttributePathForTask("MemSoftLimit", params.pid, &path)) {
+            ALOGE("Querying MemSoftLimit path failed");
+            return;
+        }
+
         snprintf(val, sizeof(val), "%d", soft_limit_mult * EIGHT_MEGA);
 
         /*
@@ -1177,7 +1195,7 @@
         is_system_server = (params.oomadj == SYSTEM_ADJ &&
                             (pwdrec = getpwnam("system")) != NULL &&
                             params.uid == pwdrec->pw_uid);
-        writefilestring(path, val, !is_system_server);
+        writefilestring(path.c_str(), val, !is_system_server);
     }
 
     procp = pid_lookup(params.pid);
@@ -1359,8 +1377,9 @@
     static struct timespec last_req_tm;
     struct timespec curr_tm;
 
-    if (ntargets < 1 || ntargets > (int)ARRAY_SIZE(lowmem_adj))
+    if (ntargets < 1 || ntargets > (int)lowmem_adj.size()) {
         return;
+    }
 
     /*
      * Ratelimit minfree updates to once per TARGET_UPDATE_MIN_INTERVAL_MS
@@ -1452,8 +1471,9 @@
     switch(cmd) {
     case LMK_TARGET:
         targets = nargs / 2;
-        if (nargs & 0x1 || targets > (int)ARRAY_SIZE(lowmem_adj))
+        if (nargs & 0x1 || targets > (int)lowmem_adj.size()) {
             goto wronglen;
+        }
         cmd_target(targets, packet);
         break;
     case LMK_PROCPRIO:
@@ -1897,6 +1917,53 @@
     return 0;
 }
 
+static int psi_parse(struct reread_data *file_data, struct psi_stats stats[], bool full) {
+    char *buf;
+    char *save_ptr;
+    char *line;
+
+    if ((buf = reread_file(file_data)) == NULL) {
+        return -1;
+    }
+
+    line = strtok_r(buf, "\n", &save_ptr);
+    if (parse_psi_line(line, PSI_SOME, stats)) {
+        return -1;
+    }
+    if (full) {
+        line = strtok_r(NULL, "\n", &save_ptr);
+        if (parse_psi_line(line, PSI_FULL, stats)) {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int psi_parse_mem(struct psi_data *psi_data) {
+    static struct reread_data file_data = {
+        .filename = PSI_PATH_MEMORY,
+        .fd = -1,
+    };
+    return psi_parse(&file_data, psi_data->mem_stats, true);
+}
+
+static int psi_parse_io(struct psi_data *psi_data) {
+    static struct reread_data file_data = {
+        .filename = PSI_PATH_IO,
+        .fd = -1,
+    };
+    return psi_parse(&file_data, psi_data->io_stats, true);
+}
+
+static int psi_parse_cpu(struct psi_data *psi_data) {
+    static struct reread_data file_data = {
+        .filename = PSI_PATH_CPU,
+        .fd = -1,
+    };
+    return psi_parse(&file_data, psi_data->cpu_stats, false);
+}
+
 enum wakeup_reason {
     Event,
     Polling
@@ -1932,38 +1999,96 @@
     }
 }
 
+struct kill_info {
+    enum kill_reasons kill_reason;
+    const char *kill_desc;
+    int thrashing;
+    int max_thrashing;
+};
+
 static void killinfo_log(struct proc* procp, int min_oom_score, int rss_kb,
-                         int swap_kb, int kill_reason, union meminfo *mi,
-                         struct wakeup_info *wi, struct timespec *tm) {
+                         int swap_kb, struct kill_info *ki, union meminfo *mi,
+                         struct wakeup_info *wi, struct timespec *tm, struct psi_data *pd) {
     /* log process information */
     android_log_write_int32(ctx, procp->pid);
     android_log_write_int32(ctx, procp->uid);
     android_log_write_int32(ctx, procp->oomadj);
     android_log_write_int32(ctx, min_oom_score);
-    android_log_write_int32(ctx, (int32_t)min(rss_kb, INT32_MAX));
-    android_log_write_int32(ctx, kill_reason);
+    android_log_write_int32(ctx, std::min(rss_kb, (int)INT32_MAX));
+    android_log_write_int32(ctx, ki ? ki->kill_reason : NONE);
 
     /* log meminfo fields */
     for (int field_idx = 0; field_idx < MI_FIELD_COUNT; field_idx++) {
-        android_log_write_int32(ctx, (int32_t)min(mi->arr[field_idx] * page_k, INT32_MAX));
+        android_log_write_int32(ctx,
+                                mi ? std::min(mi->arr[field_idx] * page_k, (int64_t)INT32_MAX) : 0);
     }
 
     /* log lmkd wakeup information */
-    android_log_write_int32(ctx, (int32_t)get_time_diff_ms(&wi->last_event_tm, tm));
-    android_log_write_int32(ctx, (int32_t)get_time_diff_ms(&wi->prev_wakeup_tm, tm));
-    android_log_write_int32(ctx, wi->wakeups_since_event);
-    android_log_write_int32(ctx, wi->skipped_wakeups);
-    android_log_write_int32(ctx, (int32_t)min(swap_kb, INT32_MAX));
-    android_log_write_int32(ctx, (int32_t)mi->field.total_gpu_kb);
+    if (wi) {
+        android_log_write_int32(ctx, (int32_t)get_time_diff_ms(&wi->last_event_tm, tm));
+        android_log_write_int32(ctx, (int32_t)get_time_diff_ms(&wi->prev_wakeup_tm, tm));
+        android_log_write_int32(ctx, wi->wakeups_since_event);
+        android_log_write_int32(ctx, wi->skipped_wakeups);
+    } else {
+        android_log_write_int32(ctx, 0);
+        android_log_write_int32(ctx, 0);
+        android_log_write_int32(ctx, 0);
+        android_log_write_int32(ctx, 0);
+    }
+
+    android_log_write_int32(ctx, std::min(swap_kb, (int)INT32_MAX));
+    android_log_write_int32(ctx, mi ? (int32_t)mi->field.total_gpu_kb : 0);
+    if (ki) {
+        android_log_write_int32(ctx, ki->thrashing);
+        android_log_write_int32(ctx, ki->max_thrashing);
+    } else {
+        android_log_write_int32(ctx, 0);
+        android_log_write_int32(ctx, 0);
+    }
+
+    if (pd) {
+        android_log_write_float32(ctx, pd->mem_stats[PSI_SOME].avg10);
+        android_log_write_float32(ctx, pd->mem_stats[PSI_FULL].avg10);
+        android_log_write_float32(ctx, pd->io_stats[PSI_SOME].avg10);
+        android_log_write_float32(ctx, pd->io_stats[PSI_FULL].avg10);
+        android_log_write_float32(ctx, pd->cpu_stats[PSI_SOME].avg10);
+    } else {
+        for (int i = 0; i < 5; i++) {
+            android_log_write_float32(ctx, 0);
+        }
+    }
 
     android_log_write_list(ctx, LOG_ID_EVENTS);
     android_log_reset(ctx);
 }
 
-static struct proc *proc_adj_lru(int oomadj) {
+// Note: returned entry is only an anchor and does not hold a valid process info.
+// When called from a non-main thread, adjslot_list_lock read lock should be taken.
+static struct proc *proc_adj_head(int oomadj) {
+    return (struct proc *)&procadjslot_list[ADJTOSLOT(oomadj)];
+}
+
+// When called from a non-main thread, adjslot_list_lock read lock should be taken.
+static struct proc *proc_adj_tail(int oomadj) {
     return (struct proc *)adjslot_tail(&procadjslot_list[ADJTOSLOT(oomadj)]);
 }
 
+// When called from a non-main thread, adjslot_list_lock read lock should be taken.
+static struct proc *proc_adj_prev(int oomadj, int pid) {
+    struct adjslot_list *head = &procadjslot_list[ADJTOSLOT(oomadj)];
+    struct adjslot_list *curr = adjslot_tail(&procadjslot_list[ADJTOSLOT(oomadj)]);
+
+    while (curr != head) {
+        if (((struct proc *)curr)->pid == pid) {
+            return (struct proc *)curr->prev;
+        }
+        curr = curr->prev;
+    }
+
+    return NULL;
+}
+
+// When called from a non-main thread, adjslot_list_lock read lock should be taken.
 static struct proc *proc_get_heaviest(int oomadj) {
     struct adjslot_list *head = &procadjslot_list[ADJTOSLOT(oomadj)];
     struct adjslot_list *curr = head->next;
@@ -1987,41 +2112,55 @@
     return maxprocp;
 }
 
-static void set_process_group_and_prio(int pid, SchedPolicy sp, int prio) {
-    DIR* d;
-    char proc_path[PATH_MAX];
-    struct dirent* de;
+static bool find_victim(int oom_score, int prev_pid, struct proc &target_proc) {
+    struct proc *procp;
+    std::shared_lock lock(adjslot_list_lock);
 
-    snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid);
-    if (!(d = opendir(proc_path))) {
-        ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno,
-              pid);
-        return;
-    }
-
-    while ((de = readdir(d))) {
-        int t_pid;
-
-        if (de->d_name[0] == '.') continue;
-        t_pid = atoi(de->d_name);
-
-        if (!t_pid) {
-            ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid);
-            continue;
-        }
-
-        if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) {
-            ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno);
-        }
-
-        if (set_cpuset_policy(t_pid, sp)) {
-            ALOGW("Failed to set_cpuset_policy on pid(%d) t_pid(%d) to %d", pid, t_pid, (int)sp);
-            continue;
+    if (!prev_pid) {
+        procp = proc_adj_tail(oom_score);
+    } else {
+        procp = proc_adj_prev(oom_score, prev_pid);
+        if (!procp) {
+            // pid was removed, restart at the tail
+            procp = proc_adj_tail(oom_score);
         }
     }
-    closedir(d);
+
+    // the list is empty at this oom_score or we looped through it
+    if (!procp || procp == proc_adj_head(oom_score)) {
+        return false;
+    }
+
+    // make a copy because original might be destroyed after adjslot_list_lock is released
+    target_proc = *procp;
+
+    return true;
 }
 
+static void watchdog_callback() {
+    int prev_pid = 0;
+
+    ALOGW("lmkd watchdog timed out!");
+    for (int oom_score = OOM_SCORE_ADJ_MAX; oom_score >= 0;) {
+        struct proc target;
+
+        if (!find_victim(oom_score, prev_pid, target)) {
+            oom_score--;
+            prev_pid = 0;
+            continue;
+        }
+
+        if (reaper.kill({ target.pidfd, target.pid, target.uid }, true) == 0) {
+            ALOGW("lmkd watchdog killed process %d, oom_score_adj %d", target.pid, oom_score);
+            killinfo_log(&target, 0, 0, 0, NULL, NULL, NULL, NULL, NULL);
+            break;
+        }
+        prev_pid = target.pid;
+    }
+}
+
+static Watchdog watchdog(WATCHDOG_TIMEOUT_SEC, watchdog_callback);
+
 static bool is_kill_pending(void) {
     char buf[24];
 
@@ -2092,6 +2231,19 @@
     poll_params->update = POLLING_RESUME;
 }
 
+static void kill_fail_handler(int data __unused, uint32_t events __unused,
+                              struct polling_params *poll_params) {
+    int pid;
+
+    // Extract pid from the communication pipe. Clearing the pipe this way allows further
+    // epoll_wait calls to sleep until the next event.
+    if (TEMP_FAILURE_RETRY(read(reaper_comm_fd[0], &pid, sizeof(pid))) != sizeof(pid)) {
+        ALOGE("thread communication read failed: %s", strerror(errno));
+    }
+    stop_wait_for_proc_kill(false);
+    poll_params->update = POLLING_RESUME;
+}
+
 static void start_wait_for_proc_kill(int pid_or_fd) {
     static struct event_handler_info kill_done_hinfo = { 0, kill_done_handler };
     struct epoll_event epev;
@@ -2120,21 +2272,15 @@
     maxevents++;
 }
 
-struct kill_info {
-    enum kill_reasons kill_reason;
-    const char *kill_desc;
-    int thrashing;
-    int max_thrashing;
-};
-
 /* Kill one process specified by procp.  Returns the size (in pages) of the process killed */
 static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_info *ki,
-                            union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) {
+                            union meminfo *mi, struct wakeup_info *wi, struct timespec *tm,
+                            struct psi_data *pd) {
     int pid = procp->pid;
     int pidfd = procp->pidfd;
     uid_t uid = procp->uid;
     char *taskname;
-    int r;
+    int kill_result;
     int result = -1;
     struct memory_stat *mem_st;
     struct kill_stat kill_st;
@@ -2142,6 +2288,7 @@
     int64_t rss_kb;
     int64_t swap_kb;
     char buf[PAGE_SIZE];
+    char desc[LINE_MAX];
 
     if (!read_proc_status(pid, buf, sizeof(buf))) {
         goto out;
@@ -2170,28 +2317,23 @@
 
     mem_st = stats_read_memory_stat(per_app_memcg, pid, uid, rss_kb * 1024, swap_kb * 1024);
 
-    TRACE_KILL_START(pid);
+    snprintf(desc, sizeof(desc), "lmk,%d,%d,%d,%d,%d", pid, ki ? (int)ki->kill_reason : -1,
+             procp->oomadj, min_oom_score, ki ? ki->max_thrashing : -1);
 
-    /* CAP_KILL required */
-    if (pidfd < 0) {
-        start_wait_for_proc_kill(pid);
-        r = kill(pid, SIGKILL);
-    } else {
-        start_wait_for_proc_kill(pidfd);
-        r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
-    }
+    trace_kill_start(pid, desc);
 
-    TRACE_KILL_END();
+    start_wait_for_proc_kill(pidfd < 0 ? pid : pidfd);
+    kill_result = reaper.kill({ pidfd, pid, uid }, false);
 
-    if (r) {
+    trace_kill_end();
+
+    if (kill_result) {
         stop_wait_for_proc_kill(false);
         ALOGE("kill(%d): errno=%d", pid, errno);
         /* Delete process record even when we fail to kill so that we don't get stuck on it */
         goto out;
     }
 
-    set_process_group_and_prio(pid, SP_FOREGROUND, ANDROID_PRIORITY_HIGHEST);
-
     last_kill_tm = *tm;
 
     inc_killcnt(procp->oomadj);
@@ -2200,7 +2342,6 @@
         kill_st.kill_reason = ki->kill_reason;
         kill_st.thrashing = ki->thrashing;
         kill_st.max_thrashing = ki->max_thrashing;
-        killinfo_log(procp, min_oom_score, rss_kb, swap_kb, ki->kill_reason, mi, wi, tm);
         ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64
               "kB swap; reason: %s", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb,
               ki->kill_desc);
@@ -2208,10 +2349,10 @@
         kill_st.kill_reason = NONE;
         kill_st.thrashing = 0;
         kill_st.max_thrashing = 0;
-        killinfo_log(procp, min_oom_score, rss_kb, swap_kb, NONE, mi, wi, tm);
         ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64
               "kb swap", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb);
     }
+    killinfo_log(procp, min_oom_score, rss_kb, swap_kb, ki, mi, wi, tm, pd);
 
     kill_st.uid = static_cast<int32_t>(uid);
     kill_st.taskname = taskname;
@@ -2239,7 +2380,8 @@
  * Returns size of the killed process.
  */
 static int find_and_kill_process(int min_score_adj, struct kill_info *ki, union meminfo *mi,
-                                 struct wakeup_info *wi, struct timespec *tm) {
+                                 struct wakeup_info *wi, struct timespec *tm,
+                                 struct psi_data *pd) {
     int i;
     int killed_size = 0;
     bool lmk_state_change_start = false;
@@ -2258,12 +2400,12 @@
 
         while (true) {
             procp = choose_heaviest_task ?
-                proc_get_heaviest(i) : proc_adj_lru(i);
+                proc_get_heaviest(i) : proc_adj_tail(i);
 
             if (!procp)
                 break;
 
-            killed_size = kill_one_process(procp, min_score_adj, ki, mi, wi, tm);
+            killed_size = kill_one_process(procp, min_score_adj, ki, mi, wi, tm, pd);
             if (killed_size >= 0) {
                 if (!lmk_state_change_start) {
                     lmk_state_change_start = true;
@@ -2410,7 +2552,6 @@
     static int64_t base_file_lru;
     static int64_t init_pgscan_kswapd;
     static int64_t init_pgscan_direct;
-    static int64_t swap_low_threshold;
     static bool killing;
     static int thrashing_limit = thrashing_limit_pct;
     static struct zone_watermarks watermarks;
@@ -2423,6 +2564,7 @@
 
     union meminfo mi;
     union vmstat vs;
+    struct psi_data psi_data;
     struct timespec curr_tm;
     int64_t thrashing = 0;
     bool swap_is_low = false;
@@ -2435,8 +2577,10 @@
     bool cut_thrashing_limit = false;
     int min_score_adj = 0;
     int swap_util = 0;
+    int64_t swap_low_threshold;
     long since_thrashing_reset_ms;
     int64_t workingset_refault_file;
+    bool critical_stall = false;
 
     if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
         ALOGE("Failed to get current time");
@@ -2483,10 +2627,10 @@
 
     /* Check free swap levels */
     if (swap_free_low_percentage) {
-        if (!swap_low_threshold) {
-            swap_low_threshold = mi.field.total_swap * swap_free_low_percentage / 100;
-        }
+        swap_low_threshold = mi.field.total_swap * swap_free_low_percentage / 100;
         swap_is_low = mi.field.free_swap < swap_low_threshold;
+    } else {
+        swap_low_threshold = 0;
     }
 
     /* Identify reclaim state */
@@ -2569,6 +2713,9 @@
     /* Find out which watermark is breached if any */
     wmark = get_lowest_watermark(&mi, &watermarks);
 
+    if (!psi_parse_mem(&psi_data)) {
+        critical_stall = psi_data.mem_stats[PSI_FULL].avg10 > (float)stall_limit_critical;
+    }
     /*
      * TODO: move this logic into a separate function
      * Decide if killing a process is necessary and record the reason
@@ -2666,7 +2813,14 @@
             .thrashing = (int)thrashing,
             .max_thrashing = max_thrashing,
         };
-        int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm);
+
+        /* Allow killing perceptible apps if the system is stalled */
+        if (critical_stall) {
+            min_score_adj = 0;
+        }
+        psi_parse_io(&psi_data);
+        psi_parse_cpu(&psi_data);
+        int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm, &psi_data);
         if (pages_freed > 0) {
             killing = true;
             max_thrashing = 0;
@@ -2708,6 +2862,16 @@
     }
 }
 
+static std::string GetCgroupAttributePath(const char* attr) {
+    std::string path;
+    if (!CgroupGetAttributePath(attr, &path)) {
+        ALOGE("Unknown cgroup attribute %s", attr);
+    }
+    return path;
+}
+
+// The implementation of this function relies on memcg statistics that are only available in the
+// v1 cgroup hierarchy.
 static void mp_event_common(int data, uint32_t events, struct polling_params *poll_params) {
     unsigned long long evcount;
     int64_t mem_usage, memsw_usage;
@@ -2720,12 +2884,14 @@
     long other_free = 0, other_file = 0;
     int min_score_adj;
     int minfree = 0;
+    static const std::string mem_usage_path = GetCgroupAttributePath("MemUsage");
     static struct reread_data mem_usage_file_data = {
-        .filename = MEMCG_MEMORY_USAGE,
+        .filename = mem_usage_path.c_str(),
         .fd = -1,
     };
+    static const std::string memsw_usage_path = GetCgroupAttributePath("MemAndSwapUsage");
     static struct reread_data memsw_usage_file_data = {
-        .filename = MEMCG_MEMORYSW_USAGE,
+        .filename = memsw_usage_path.c_str(),
         .fd = -1,
     };
     static struct wakeup_info wi;
@@ -2822,7 +2988,7 @@
         }
 
         if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
-            if (debug_process_killing) {
+            if (debug_process_killing && lowmem_targets_size) {
                 ALOGI("Ignore %s memory pressure event "
                       "(free memory=%ldkB, cache=%ldkB, limit=%ldkB)",
                       level_name[level], other_free * page_k, other_file * page_k,
@@ -2886,7 +3052,7 @@
 do_kill:
     if (low_ram_device) {
         /* For Go devices kill only one task */
-        if (find_and_kill_process(level_oomadj[level], NULL, &mi, &wi, &curr_tm) == 0) {
+        if (find_and_kill_process(level_oomadj[level], NULL, &mi, &wi, &curr_tm, NULL) == 0) {
             if (debug_process_killing) {
                 ALOGI("Nothing to kill");
             }
@@ -2909,7 +3075,7 @@
             min_score_adj = level_oomadj[level];
         }
 
-        pages_freed = find_and_kill_process(min_score_adj, NULL, &mi, &wi, &curr_tm);
+        pages_freed = find_and_kill_process(min_score_adj, NULL, &mi, &wi, &curr_tm, NULL);
 
         if (pages_freed == 0) {
             /* Rate limit kill reports when nothing was reclaimed */
@@ -2988,14 +3154,44 @@
     mpevfd[level] = -1;
 }
 
+enum class MemcgVersion {
+    kNotFound,
+    kV1,
+    kV2,
+};
+
+static MemcgVersion __memcg_version() {
+    std::string cgroupv2_path, memcg_path;
+
+    if (!CgroupGetControllerPath("memory", &memcg_path)) {
+        return MemcgVersion::kNotFound;
+    }
+    return CgroupGetControllerPath(CGROUPV2_CONTROLLER_NAME, &cgroupv2_path) &&
+                           cgroupv2_path == memcg_path
+                   ? MemcgVersion::kV2
+                   : MemcgVersion::kV1;
+}
+
+static MemcgVersion memcg_version() {
+    static MemcgVersion version = __memcg_version();
+
+    return version;
+}
+
 static bool init_psi_monitors() {
     /*
      * When PSI is used on low-ram devices or on high-end devices without memfree levels
-     * use new kill strategy based on zone watermarks, free swap and thrashing stats
+     * use new kill strategy based on zone watermarks, free swap and thrashing stats.
+     * Also use the new strategy if memcg has not been mounted in the v1 cgroups hiearchy since
+     * the old strategy relies on memcg attributes that are available only in the v1 cgroups
+     * hiearchy.
      */
     bool use_new_strategy =
         GET_LMK_PROPERTY(bool, "use_new_strategy", low_ram_device || !use_minfree_levels);
-
+    if (!use_new_strategy && memcg_version() != MemcgVersion::kV1) {
+        ALOGE("Old kill strategy can only be used with v1 cgroup hierarchy");
+        return false;
+    }
     /* In default PSI mode override stall amounts using system properties */
     if (use_new_strategy) {
         /* Do not use low pressure level */
@@ -3020,6 +3216,13 @@
 }
 
 static bool init_mp_common(enum vmpressure_level level) {
+    // The implementation of this function relies on memcg statistics that are only available in the
+    // v1 cgroup hierarchy.
+    if (memcg_version() != MemcgVersion::kV1) {
+        ALOGE("%s: global monitoring is only available for the v1 cgroup hierarchy", __func__);
+        return false;
+    }
+
     int mpfd;
     int evfd;
     int evctlfd;
@@ -3030,13 +3233,13 @@
     const char *levelstr = level_name[level_idx];
 
     /* gid containing AID_SYSTEM required */
-    mpfd = open(MEMCG_SYSFS_PATH "memory.pressure_level", O_RDONLY | O_CLOEXEC);
+    mpfd = open(GetCgroupAttributePath("MemPressureLevel").c_str(), O_RDONLY | O_CLOEXEC);
     if (mpfd < 0) {
         ALOGI("No kernel memory.pressure_level support (errno=%d)", errno);
         goto err_open_mpfd;
     }
 
-    evctlfd = open(MEMCG_SYSFS_PATH "cgroup.event_control", O_WRONLY | O_CLOEXEC);
+    evctlfd = open(GetCgroupAttributePath("CgroupEventControl").c_str(), O_WRONLY | O_CLOEXEC);
     if (evctlfd < 0) {
         ALOGI("No kernel memory cgroup event control (errno=%d)", errno);
         goto err_open_evctlfd;
@@ -3140,6 +3343,63 @@
     }
 }
 
+static void drop_reaper_comm() {
+    close(reaper_comm_fd[0]);
+    close(reaper_comm_fd[1]);
+}
+
+static bool setup_reaper_comm() {
+    if (pipe(reaper_comm_fd)) {
+        ALOGE("pipe failed: %s", strerror(errno));
+        return false;
+    }
+
+    // Ensure main thread never blocks on read
+    int flags = fcntl(reaper_comm_fd[0], F_GETFL);
+    if (fcntl(reaper_comm_fd[0], F_SETFL, flags | O_NONBLOCK)) {
+        ALOGE("fcntl failed: %s", strerror(errno));
+        drop_reaper_comm();
+        return false;
+    }
+
+    return true;
+}
+
+static bool init_reaper() {
+    if (!reaper.is_reaping_supported()) {
+        ALOGI("Process reaping is not supported");
+        return false;
+    }
+
+    if (!setup_reaper_comm()) {
+        ALOGE("Failed to create thread communication channel");
+        return false;
+    }
+
+    // Setup epoll handler
+    struct epoll_event epev;
+    static struct event_handler_info kill_failed_hinfo = { 0, kill_fail_handler };
+    epev.events = EPOLLIN;
+    epev.data.ptr = (void *)&kill_failed_hinfo;
+    if (epoll_ctl(epollfd, EPOLL_CTL_ADD, reaper_comm_fd[0], &epev)) {
+        ALOGE("epoll_ctl failed: %s", strerror(errno));
+        drop_reaper_comm();
+        return false;
+    }
+
+    if (!reaper.init(reaper_comm_fd[1])) {
+        ALOGE("Failed to initialize reaper object");
+        if (epoll_ctl(epollfd, EPOLL_CTL_DEL, reaper_comm_fd[0], &epev)) {
+            ALOGE("epoll_ctl failed: %s", strerror(errno));
+        }
+        drop_reaper_comm();
+        return false;
+    }
+    maxevents++;
+
+    return true;
+}
+
 static int init(void) {
     static struct event_handler_info kernel_poll_hinfo = { 0, kernel_event_handler };
     struct reread_data file_data = {
@@ -3257,6 +3517,7 @@
                          struct polling_params *poll_params, uint32_t events) {
     struct timespec curr_tm;
 
+    watchdog.start();
     poll_params->update = POLLING_DO_NOT_CHANGE;
     handler_info->handler(handler_info->data, events, poll_params);
     clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
@@ -3288,6 +3549,7 @@
         }
         break;
     }
+    watchdog.stop();
 }
 
 static void mainloop(void) {
@@ -3369,7 +3631,9 @@
             if ((evt->events & EPOLLHUP) && evt->data.ptr) {
                 ALOGI("lmkd data connection dropped");
                 handler_info = (struct event_handler_info*)evt->data.ptr;
+                watchdog.start();
                 ctrl_data_close(handler_info->data);
+                watchdog.stop();
             }
         }
 
@@ -3454,14 +3718,18 @@
         low_ram_device ? DEF_PARTIAL_STALL_LOWRAM : DEF_PARTIAL_STALL);
     psi_complete_stall_ms = GET_LMK_PROPERTY(int32, "psi_complete_stall_ms",
         DEF_COMPLETE_STALL);
-    thrashing_limit_pct = max(0, GET_LMK_PROPERTY(int32, "thrashing_limit",
-        low_ram_device ? DEF_THRASHING_LOWRAM : DEF_THRASHING));
+    thrashing_limit_pct =
+            std::max(0, GET_LMK_PROPERTY(int32, "thrashing_limit",
+                                         low_ram_device ? DEF_THRASHING_LOWRAM : DEF_THRASHING));
     thrashing_limit_decay_pct = clamp(0, 100, GET_LMK_PROPERTY(int32, "thrashing_limit_decay",
         low_ram_device ? DEF_THRASHING_DECAY_LOWRAM : DEF_THRASHING_DECAY));
-    thrashing_critical_pct = max(0, GET_LMK_PROPERTY(int32, "thrashing_limit_critical",
-        thrashing_limit_pct * 2));
+    thrashing_critical_pct = std::max(
+            0, GET_LMK_PROPERTY(int32, "thrashing_limit_critical", thrashing_limit_pct * 2));
     swap_util_max = clamp(0, 100, GET_LMK_PROPERTY(int32, "swap_util_max", 100));
     filecache_min_kb = GET_LMK_PROPERTY(int64, "filecache_min_kb", 0);
+    stall_limit_critical = GET_LMK_PROPERTY(int64, "stall_limit_critical", 100);
+
+    reaper.enable_debug(debug_process_killing);
 }
 
 int main(int argc, char **argv) {
@@ -3503,6 +3771,15 @@
             }
         }
 
+        if (init_reaper()) {
+            ALOGI("Process reaper initialized with %d threads in the pool",
+                reaper.thread_cnt());
+        }
+
+        if (!watchdog.init()) {
+            ALOGE("Failed to initialize the watchdog");
+        }
+
         mainloop();
     }
 
diff --git a/lmkd.rc b/lmkd.rc
index 6f90bcb..ba662b4 100644
--- a/lmkd.rc
+++ b/lmkd.rc
@@ -5,7 +5,7 @@
     capabilities DAC_OVERRIDE KILL IPC_LOCK SYS_NICE SYS_RESOURCE
     critical
     socket lmkd seqpacket+passcred 0660 system system
-    writepid /dev/cpuset/system-background/tasks
+    task_profiles ServiceCapacityLow
 
 on property:lmkd.reinit=1
     exec_background /system/bin/lmkd --reinit
diff --git a/reaper.cpp b/reaper.cpp
new file mode 100644
index 0000000..2c9e737
--- /dev/null
+++ b/reaper.cpp
@@ -0,0 +1,253 @@
+/*
+ *  Copyright 2021 Google, Inc
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#define LOG_TAG "lowmemorykiller"
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <log/log.h>
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/epoll.h>
+#include <sys/pidfd.h>
+#include <sys/resource.h>
+#include <sys/sysinfo.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <processgroup/processgroup.h>
+#include <system/thread_defs.h>
+
+#include "reaper.h"
+
+#define NS_PER_MS (NS_PER_SEC / MS_PER_SEC)
+#define THREAD_POOL_SIZE 2
+
+#ifndef __NR_process_mrelease
+#define __NR_process_mrelease 448
+#endif
+
+static int process_mrelease(int pidfd, unsigned int flags) {
+    return syscall(__NR_process_mrelease, pidfd, flags);
+}
+
+static inline long get_time_diff_ms(struct timespec *from,
+                                    struct timespec *to) {
+    return (to->tv_sec - from->tv_sec) * (long)MS_PER_SEC +
+           (to->tv_nsec - from->tv_nsec) / (long)NS_PER_MS;
+}
+
+static void* reaper_main(void* param) {
+    Reaper *reaper = static_cast<Reaper*>(param);
+    struct timespec start_tm, end_tm;
+    struct Reaper::target_proc target;
+    pid_t tid = gettid();
+
+    // Ensure the thread does not use little cores
+    if (!SetTaskProfiles(tid, {"CPUSET_SP_FOREGROUND"}, true)) {
+        ALOGE("Failed to assign cpuset to the reaper thread");
+    }
+
+    for (;;) {
+        target = reaper->dequeue_request();
+
+        if (reaper->debug_enabled()) {
+            clock_gettime(CLOCK_MONOTONIC_COARSE, &start_tm);
+        }
+
+        if (pidfd_send_signal(target.pidfd, SIGKILL, NULL, 0)) {
+            // Inform the main thread about failure to kill
+            reaper->notify_kill_failure(target.pid);
+            goto done;
+        }
+        if (process_mrelease(target.pidfd, 0)) {
+            ALOGE("process_mrelease %d failed: %s", target.pidfd, strerror(errno));
+            goto done;
+        }
+        if (reaper->debug_enabled()) {
+            clock_gettime(CLOCK_MONOTONIC_COARSE, &end_tm);
+            ALOGI("Process %d was reaped in %ldms", target.pid,
+                  get_time_diff_ms(&start_tm, &end_tm));
+        }
+done:
+        close(target.pidfd);
+        reaper->request_complete();
+    }
+
+    return NULL;
+}
+
+bool Reaper::is_reaping_supported() {
+    static enum {
+        UNKNOWN,
+        SUPPORTED,
+        UNSUPPORTED
+    } reap_support = UNKNOWN;
+
+    if (reap_support == UNKNOWN) {
+        if (process_mrelease(-1, 0) && errno == ENOSYS) {
+            reap_support = UNSUPPORTED;
+        } else {
+            reap_support = SUPPORTED;
+        }
+    }
+    return reap_support == SUPPORTED;
+}
+
+bool Reaper::init(int comm_fd) {
+    char name[16];
+
+    if (thread_cnt_ > 0) {
+        // init should not be called multiple times
+        return false;
+    }
+
+    thread_pool_ = new pthread_t[THREAD_POOL_SIZE];
+    for (int i = 0; i < THREAD_POOL_SIZE; i++) {
+        if (pthread_create(&thread_pool_[thread_cnt_], NULL, reaper_main, this)) {
+            ALOGE("pthread_create failed: %s", strerror(errno));
+            continue;
+        }
+        snprintf(name, sizeof(name), "lmkd_reaper%d", thread_cnt_);
+        if (pthread_setname_np(thread_pool_[thread_cnt_], name)) {
+            ALOGW("pthread_setname_np failed: %s", strerror(errno));
+        }
+        thread_cnt_++;
+    }
+
+    if (!thread_cnt_) {
+        delete[] thread_pool_;
+        return false;
+    }
+
+    queue_.reserve(thread_cnt_);
+    comm_fd_ = comm_fd;
+    return true;
+}
+
+static void set_process_group_and_prio(uid_t uid, int pid, const std::vector<std::string>& profiles,
+                                       int prio) {
+    DIR* d;
+    char proc_path[PATH_MAX];
+    struct dirent* de;
+
+    if (!SetProcessProfilesCached(uid, pid, profiles)) {
+        ALOGW("Failed to set task profiles for the process (%d) being killed", pid);
+    }
+
+    snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid);
+    if (!(d = opendir(proc_path))) {
+        ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno,
+              pid);
+        return;
+    }
+
+    while ((de = readdir(d))) {
+        int t_pid;
+
+        if (de->d_name[0] == '.') continue;
+        t_pid = atoi(de->d_name);
+
+        if (!t_pid) {
+            ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid);
+            continue;
+        }
+
+        if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) {
+            ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno);
+        }
+    }
+    closedir(d);
+}
+
+bool Reaper::async_kill(const struct target_proc& target) {
+    if (target.pidfd == -1) {
+        return false;
+    }
+
+    if (!thread_cnt_) {
+        return false;
+    }
+
+    mutex_.lock();
+    if (active_requests_ >= thread_cnt_) {
+        mutex_.unlock();
+        return false;
+    }
+    active_requests_++;
+
+    // Duplicate pidfd instead of reusing the original one to avoid synchronization and refcounting
+    // when both reaper and main threads are using or closing the pidfd
+    queue_.push_back({ dup(target.pidfd), target.pid, target.uid });
+    // Wake up a reaper thread
+    cond_.notify_one();
+    mutex_.unlock();
+
+    set_process_group_and_prio(target.uid, target.pid,
+                               {"CPUSET_SP_FOREGROUND", "SCHED_SP_FOREGROUND"},
+                               ANDROID_PRIORITY_HIGHEST);
+
+    return true;
+}
+
+int Reaper::kill(const struct target_proc& target, bool synchronous) {
+    /* CAP_KILL required */
+    if (target.pidfd < 0) {
+        return ::kill(target.pid, SIGKILL);
+    }
+
+    if (!synchronous && async_kill(target)) {
+        // we assume the kill will be successful and if it fails we will be notified
+        return 0;
+    }
+
+    int result = pidfd_send_signal(target.pidfd, SIGKILL, NULL, 0);
+    if (result) {
+        return result;
+    }
+
+    return is_reaping_supported() ? process_mrelease(target.pidfd, 0) : 0;
+}
+
+Reaper::target_proc Reaper::dequeue_request() {
+    struct target_proc target;
+    std::unique_lock<std::mutex> lock(mutex_);
+
+    while (queue_.empty()) {
+        cond_.wait(lock);
+    }
+    target = queue_.back();
+    queue_.pop_back();
+
+    return target;
+}
+
+void Reaper::request_complete() {
+    std::scoped_lock<std::mutex> lock(mutex_);
+    active_requests_--;
+}
+
+void Reaper::notify_kill_failure(int pid) {
+    std::scoped_lock<std::mutex> lock(mutex_);
+
+    ALOGE("Failed to kill process %d", pid);
+    if (TEMP_FAILURE_RETRY(write(comm_fd_, &pid, sizeof(pid))) != sizeof(pid)) {
+        ALOGE("thread communication write failed: %s", strerror(errno));
+    }
+}
diff --git a/reaper.h b/reaper.h
new file mode 100644
index 0000000..e20d892
--- /dev/null
+++ b/reaper.h
@@ -0,0 +1,60 @@
+/*
+ *  Copyright 2021 Google, Inc
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <condition_variable>
+#include <mutex>
+#include <vector>
+
+class Reaper {
+public:
+    struct target_proc {
+        int pidfd;
+        int pid;
+        uid_t uid;
+    };
+private:
+    // mutex_ and cond_ are used to wakeup the reaper thread.
+    std::mutex mutex_;
+    std::condition_variable cond_;
+    // mutex_ protects queue_ and active_requests_ access.
+    std::vector<struct target_proc> queue_;
+    int active_requests_;
+    // write side of the pipe to communicate kill failures with the main thread
+    int comm_fd_;
+    int thread_cnt_;
+    pthread_t* thread_pool_;
+    bool debug_enabled_;
+
+    bool async_kill(const struct target_proc& target);
+public:
+    Reaper() : active_requests_(0), thread_cnt_(0), debug_enabled_(false) {}
+
+    static bool is_reaping_supported();
+
+    bool init(int comm_fd);
+    int thread_cnt() const { return thread_cnt_; }
+    void enable_debug(bool enable) { debug_enabled_ = enable; }
+    bool debug_enabled() const { return debug_enabled_; }
+
+    // return 0 on success or error code returned by the syscall
+    int kill(const struct target_proc& target, bool synchronous);
+    // below members are used only by reaper_main
+    target_proc dequeue_request();
+    void request_complete();
+    void notify_kill_failure(int pid);
+};
diff --git a/statslog.cpp b/statslog.cpp
index 6568f73..26a6d86 100644
--- a/statslog.cpp
+++ b/statslog.cpp
@@ -30,6 +30,10 @@
 #include <time.h>
 #include <unistd.h>
 
+#include <string>
+
+#include <processgroup/processgroup.h>
+
 #ifdef LMKD_LOG_STATS
 
 #define STRINGIFY(x) STRINGIFY_INTERNAL(x)
@@ -85,18 +89,20 @@
         mem_st->swap_in_bytes = value;
 }
 
-static int memory_stat_from_cgroup(struct memory_stat* mem_st, int pid, uid_t uid) {
-    FILE *fp;
-    char buf[PATH_MAX];
+static int memory_stat_from_cgroup(struct memory_stat* mem_st, int pid, uid_t uid __unused) {
+    std::string path;
+    if (!CgroupGetAttributePathForTask("MemStats", pid, &path)) {
+        ALOGE("Querying MemStats path failed");
+        return -1;
+    }
 
-    snprintf(buf, sizeof(buf), MEMCG_PROCESS_MEMORY_STAT_PATH, uid, pid);
-
-    fp = fopen(buf, "r");
+    FILE* fp = fopen(path.c_str(), "r");
 
     if (fp == NULL) {
         return -1;
     }
 
+    char buf[PAGE_SIZE];
     while (fgets(buf, PAGE_SIZE, fp) != NULL) {
         memory_stat_parse_line(buf, mem_st);
     }
diff --git a/statslog.h b/statslog.h
index 89e4d2e..e3f8b72 100644
--- a/statslog.h
+++ b/statslog.h
@@ -92,7 +92,6 @@
 
 #ifdef LMKD_LOG_STATS
 
-#define MEMCG_PROCESS_MEMORY_STAT_PATH "/dev/memcg/apps/uid_%u/pid_%d/memory.stat"
 #define PROC_STAT_FILE_PATH "/proc/%d/stat"
 #define PROC_STAT_BUFFER_SIZE 1024
 #define BYTES_IN_KILOBYTE 1024
diff --git a/tests/Android.bp b/tests/Android.bp
index dfbe0c7..effdac7 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -23,6 +23,7 @@
         "libbase",
         "liblog",
         "libcutils",
+        "libprocessgroup",
     ],
 
     static_libs: [
@@ -43,3 +44,34 @@
 
     compile_multilib: "first",
 }
+
+cc_test {
+    name: "lmkd_tests",
+    test_suites: ["device-tests"],
+    require_root: true,
+
+    shared_libs: [
+        "libbase",
+        "liblog",
+        "libcutils",
+        "libprocessgroup",
+    ],
+
+    static_libs: [
+        "liblmkd_utils",
+    ],
+
+    target: {
+        android: {
+            srcs: ["lmkd_tests.cpp"],
+        },
+    },
+
+    cflags: [
+        "-Wall",
+        "-Wextra",
+        "-Werror",
+    ],
+
+    compile_multilib: "first",
+}
diff --git a/tests/TEST_MAPPING b/tests/TEST_MAPPING
new file mode 100644
index 0000000..7c2533b
--- /dev/null
+++ b/tests/TEST_MAPPING
@@ -0,0 +1,7 @@
+{
+  "presubmit-large": [
+    {
+      "name": "lmkd_tests"
+    }
+  ]
+}
diff --git a/tests/lmkd_tests.cpp b/tests/lmkd_tests.cpp
new file mode 100644
index 0000000..0676d85
--- /dev/null
+++ b/tests/lmkd_tests.cpp
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sstream>
+#include <string>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+#include <cutils/properties.h>
+#include <gtest/gtest.h>
+#include <liblmkd_utils.h>
+#include <log/log_properties.h>
+#include <private/android_filesystem_config.h>
+
+using namespace android::base;
+
+#ifndef __NR_process_mrelease
+#define __NR_process_mrelease 448
+#endif
+
+#define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree"
+
+#define LMKD_LOGCAT_MARKER "lowmemorykiller"
+#define LMKD_KILL_TEMPLATE "Kill \'[^']*\' \\\(%d\\)"
+#define LMKD_REAP_TEMPLATE "Process %d was reaped"
+
+#define LMKD_KILL_LINE_START LMKD_LOGCAT_MARKER ": Kill"
+#define LMKD_REAP_LINE_START LMKD_LOGCAT_MARKER ": Process"
+#define LMKD_REAP_TIME_TEMPLATE LMKD_LOGCAT_MARKER ": Process %d was reaped in %ldms"
+
+#define ONE_MB (1 << 20)
+
+// Test constant parameters
+#define OOM_ADJ_MAX 1000
+#define ALLOC_STEP (5 * ONE_MB)
+#define ALLOC_DELAY 200
+
+// used to create ptr aliasing and prevent compiler optimizing the access
+static volatile void* gptr;
+
+class LmkdTest : public ::testing::Test {
+  public:
+    virtual void SetUp() {
+        // test requirements
+        if (getuid() != static_cast<unsigned>(AID_ROOT)) {
+            GTEST_SKIP() << "Must be root, skipping test";
+        }
+
+        if (!__android_log_is_debuggable()) {
+            GTEST_SKIP() << "Must be userdebug build, skipping test";
+        }
+
+        if (!access(INKERNEL_MINFREE_PATH, W_OK)) {
+            GTEST_SKIP() << "Must not have kernel lowmemorykiller driver,"
+                         << " skipping test";
+        }
+
+        // should be able to turn on lmkd debug information
+        if (!property_get_bool("ro.lmk.debug", true)) {
+            GTEST_SKIP() << "Can't run with ro.lmk.debug property set to 'false', skipping test";
+        }
+
+        // setup lmkd connection
+        ASSERT_FALSE((sock = lmkd_connect()) < 0)
+                << "Failed to connect to lmkd process, err=" << strerror(errno);
+
+        // enable ro.lmk.debug if not already enabled
+        if (!property_get_bool("ro.lmk.debug", false)) {
+            EXPECT_EQ(property_set("ro.lmk.debug", "true"), 0);
+            EXPECT_EQ(lmkd_update_props(sock), UPDATE_PROPS_SUCCESS)
+                    << "Failed to reinitialize lmkd";
+        }
+
+        uid = getuid();
+    }
+
+    virtual void TearDown() {
+        // drop lmkd connection
+        close(sock);
+    }
+
+    void SetupChild(pid_t pid, int oomadj) {
+        struct lmk_procprio params;
+
+        params.pid = pid;
+        params.uid = uid;
+        params.oomadj = oomadj;
+        params.ptype = PROC_TYPE_APP;
+        ASSERT_FALSE(lmkd_register_proc(sock, &params) < 0)
+                << "Failed to communicate with lmkd, err=" << strerror(errno);
+        GTEST_LOG_(INFO) << "Target process " << pid << " launched";
+        if (property_get_bool("ro.config.low_ram", false)) {
+            ASSERT_FALSE(create_memcg(uid, pid) != 0)
+                    << "Target process " << pid << " failed to create a cgroup";
+        }
+    }
+
+    static std::string ExecCommand(const std::string& command) {
+        FILE* fp = popen(command.c_str(), "r");
+        std::string content;
+        ReadFdToString(fileno(fp), &content);
+        pclose(fp);
+        return content;
+    }
+
+    static std::string ReadLogcat(const std::string& tag, const std::string& regex) {
+        std::string cmd = "logcat -d -b all";
+        if (!tag.empty()) {
+            cmd += " -s \"" + tag + "\"";
+        }
+        if (!regex.empty()) {
+            cmd += " -e \"" + regex + "\"";
+        }
+        return ExecCommand(cmd);
+    }
+
+    static size_t ConsumeMemory(size_t total_size, size_t step_size, size_t step_delay) {
+        volatile void* ptr;
+        size_t allocated_size = 0;
+
+        while (allocated_size < total_size) {
+            ptr = mmap(NULL, step_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+            if (ptr != MAP_FAILED) {
+                // create ptr aliasing to prevent compiler optimizing the access
+                gptr = ptr;
+                // make data non-zero
+                memset((void*)ptr, (int)(allocated_size + 1), step_size);
+                allocated_size += step_size;
+            }
+            usleep(step_delay);
+        }
+        return allocated_size;
+    }
+
+    static bool ParseProcSize(const std::string& line, long& rss, long& swap) {
+        size_t pos = line.find("to free");
+        if (pos == std::string::npos) {
+            return false;
+        }
+        return sscanf(line.c_str() + pos, "to free %ldkB rss, %ldkB swap", &rss, &swap) == 2;
+    }
+
+    static bool ParseReapTime(const std::string& line, pid_t pid, long& reap_time) {
+        int reap_pid;
+        return sscanf(line.c_str(), LMKD_REAP_TIME_TEMPLATE, &reap_pid, &reap_time) == 2 &&
+               reap_pid == pid;
+    }
+
+  private:
+    int sock;
+    uid_t uid;
+};
+
+TEST_F(LmkdTest, TargetReaping) {
+    // test specific requirements
+    if (syscall(__NR_process_mrelease, -1, 0) && errno == ENOSYS) {
+        GTEST_SKIP() << "Must support process_mrelease syscall, skipping test";
+    }
+
+    // for a child to act as a target process
+    pid_t pid = fork();
+    ASSERT_FALSE(pid < 0) << "Failed to spawn a child process, err=" << strerror(errno);
+    if (pid != 0) {
+        // parent
+        waitpid(pid, NULL, 0);
+    } else {
+        // child
+        SetupChild(getpid(), OOM_ADJ_MAX);
+        // allocate memory until killed
+        ConsumeMemory((size_t)-1, ALLOC_STEP, ALLOC_DELAY);
+        // should not reach here, child should be killed by OOM
+        FAIL() << "Target process " << pid << " was not killed";
+    }
+
+    std::string regex =
+            StringPrintf("((" LMKD_KILL_TEMPLATE ")|(" LMKD_REAP_TEMPLATE "))", pid, pid);
+    std::string logcat_out = ReadLogcat(LMKD_LOGCAT_MARKER ":I", regex);
+
+    // find kill report
+    size_t line_start = logcat_out.find(LMKD_KILL_LINE_START);
+    ASSERT_TRUE(line_start != std::string::npos) << "Kill report is not found";
+    size_t line_end = logcat_out.find('\n', line_start);
+    std::string line = logcat_out.substr(
+            line_start, line_end == std::string::npos ? std::string::npos : line_end - line_start);
+    long rss, swap;
+    ASSERT_TRUE(ParseProcSize(line, rss, swap)) << "Kill report format is invalid";
+
+    // find reap duration report
+    line_start = logcat_out.find(LMKD_REAP_LINE_START, line_end);
+    ASSERT_TRUE(line_start != std::string::npos) << "Reaping time report is not found";
+    line_end = logcat_out.find('\n', line_start);
+    line = logcat_out.substr(
+            line_start, line_end == std::string::npos ? std::string::npos : line_end - line_start);
+    long reap_time;
+    ASSERT_TRUE(ParseReapTime(line, pid, reap_time) && reap_time > 0)
+            << "Reaping time report format is invalid";
+
+    double reclaim_speed = ((double)rss + swap) / reap_time;
+    GTEST_LOG_(INFO) << "Reclaim speed " << reclaim_speed << "kB/ms (" << rss << "kB rss + " << swap
+                     << "kB swap) / " << reap_time << "ms";
+}
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    InitLogging(argv, StderrLogger);
+    return RUN_ALL_TESTS();
+}
diff --git a/watchdog.cpp b/watchdog.cpp
new file mode 100644
index 0000000..b1e4a03
--- /dev/null
+++ b/watchdog.cpp
@@ -0,0 +1,123 @@
+/*
+ *  Copyright 2021 Google, Inc
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#define LOG_TAG "lowmemorykiller"
+
+#include <errno.h>
+#include <log/log.h>
+#include <string.h>
+
+#include <processgroup/processgroup.h>
+
+#include "watchdog.h"
+
+static void* watchdog_main(void* param) {
+    Watchdog *watchdog = static_cast<Watchdog*>(param);
+    sigset_t sigset;
+    int signum;
+
+    // Ensure the thread does not use little cores
+    if (!SetTaskProfiles(gettid(), {"CPUSET_SP_FOREGROUND"}, true)) {
+        ALOGE("Failed to assign cpuset to the watchdog thread");
+    }
+
+    if (!watchdog->create_timer(sigset)) {
+        ALOGE("Watchdog timer creation failed!");
+        return NULL;
+    }
+
+    while (true) {
+        if (sigwait(&sigset, &signum) == -1) {
+            ALOGE("sigwait failed: %s", strerror(errno));
+        }
+
+        watchdog->bite();
+    }
+
+    return NULL;
+}
+
+bool Watchdog::init() {
+    pthread_t thread;
+
+    if (pthread_create(&thread, NULL, watchdog_main, this)) {
+        ALOGE("pthread_create failed: %s", strerror(errno));
+        return false;
+    }
+    if (pthread_setname_np(thread, "lmkd_watchdog")) {
+        ALOGW("pthread_setname_np failed: %s", strerror(errno));
+    }
+
+    return true;
+}
+
+bool Watchdog::start() {
+    // Start the timer and keep it active until it's disarmed
+    struct itimerspec new_timer;
+
+    if (!timer_created_) {
+        return false;
+    }
+
+    new_timer.it_value.tv_sec = timeout_;
+    new_timer.it_value.tv_nsec = 0;
+    new_timer.it_interval.tv_sec = timeout_;
+    new_timer.it_interval.tv_nsec = 0;
+
+    if (timer_settime(timer_, 0, &new_timer, NULL)) {
+        ALOGE("timer_settime failed: %s", strerror(errno));
+        return false;
+    }
+
+    return true;
+}
+
+bool Watchdog::stop() {
+    struct itimerspec new_timer = {};
+
+    if (!timer_created_) {
+        return false;
+    }
+
+    if (timer_settime(timer_, 0, &new_timer, NULL)) {
+        ALOGE("timer_settime failed: %s", strerror(errno));
+        return false;
+    }
+
+    return true;
+}
+
+bool Watchdog::create_timer(sigset_t &sigset) {
+    struct sigevent sevent;
+
+    sigemptyset(&sigset);
+    sigaddset(&sigset, SIGALRM);
+    if (sigprocmask(SIG_BLOCK, &sigset, NULL)) {
+        ALOGE("sigprocmask failed: %s", strerror(errno));
+        return false;
+    }
+
+    sevent.sigev_notify = SIGEV_THREAD_ID;
+    sevent.sigev_notify_thread_id = gettid();
+    sevent.sigev_signo = SIGALRM;
+    if (timer_create(CLOCK_MONOTONIC, &sevent, &timer_)) {
+        ALOGE("timer_create failed: %s", strerror(errno));
+        return false;
+    }
+
+    timer_created_ = true;
+    return true;
+}
diff --git a/watchdog.h b/watchdog.h
new file mode 100644
index 0000000..34cb602
--- /dev/null
+++ b/watchdog.h
@@ -0,0 +1,39 @@
+/*
+ *  Copyright 2021 Google, Inc
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <atomic>
+#include <signal.h>
+#include <time.h>
+
+class Watchdog {
+private:
+    int timeout_;
+    timer_t timer_;
+    std::atomic<bool> timer_created_;
+    void (*callback_)();
+public:
+    Watchdog(int timeout, void (*callback)()) :
+        timeout_(timeout), timer_created_(false), callback_(callback) {}
+
+    bool init();
+    bool start();
+    bool stop();
+    // used by the watchdog_main
+    bool create_timer(sigset_t &sigset);
+    void bite() const { if (callback_) callback_(); }
+};