Snap for 8564071 from b05a63e6104fcaf221813ccf4740b059b7982e69 to mainline-tethering-release
Change-Id: I36fc31ded4324cc659a8c1ae28bd96a63fa7851d
diff --git a/Android.bp b/Android.bp
index 4f53ded..0481c96 100644
--- a/Android.bp
+++ b/Android.bp
@@ -12,7 +12,11 @@
cc_binary {
name: "lmkd",
- srcs: ["lmkd.cpp"],
+ srcs: [
+ "lmkd.cpp",
+ "reaper.cpp",
+ "watchdog.cpp",
+ ],
shared_libs: [
"libcutils",
"liblog",
@@ -36,6 +40,7 @@
init_rc: ["lmkd.rc"],
defaults: ["stats_defaults"],
logtags: ["event.logtags"],
+ afdo: true,
}
cc_library_static {
@@ -50,6 +55,7 @@
defaults: ["stats_defaults"],
shared_libs: [
"liblog",
+ "libprocessgroup",
],
}
@@ -59,6 +65,7 @@
recovery_available: true,
shared_libs: [
"libcutils",
+ "libprocessgroup",
],
export_include_dirs: ["include"],
cppflags: [
diff --git a/event.logtags b/event.logtags
index 5382b49..dcab73c 100644
--- a/event.logtags
+++ b/event.logtags
@@ -21,6 +21,7 @@
# 2: int64_t
# 3: string
# 4: list
+# 5: float
#
# The data unit is a number taken from the following list:
# 1: Number of objects
@@ -35,4 +36,4 @@
# TODO: generate ".java" and ".h" files with integer constants from this file.
# for killinfo logs
-10195355 killinfo (Pid|1|5),(Uid|1|5),(OomAdj|1),(MinOomAdj|1),(TaskSize|1),(enum kill_reasons|1|5),(MemFree|1),(Cached|1),(SwapCached|1),(Buffers|1),(Shmem|1),(Unevictable|1),(SwapTotal|1),(SwapFree|1),(ActiveAnon|1),(InactiveAnon|1),(ActiveFile|1),(InactiveFile|1),(SReclaimable|1),(SUnreclaim|1),(KernelStack|1),(PageTables|1),(IonHeap|1),(IonHeapPool|1),(CmaFree|1),(MsSinceEvent|1),(MsSincePrevWakeup|1),(WakeupsSinceEvent|1),(SkippedWakeups|1),(TaskSwapSize|1),(GPU|1)
+10195355 killinfo (Pid|1|5),(Uid|1|5),(OomAdj|1),(MinOomAdj|1),(TaskSize|1),(enum kill_reasons|1|5),(MemFree|1),(Cached|1),(SwapCached|1),(Buffers|1),(Shmem|1),(Unevictable|1),(SwapTotal|1),(SwapFree|1),(ActiveAnon|1),(InactiveAnon|1),(ActiveFile|1),(InactiveFile|1),(SReclaimable|1),(SUnreclaim|1),(KernelStack|1),(PageTables|1),(IonHeap|1),(IonHeapPool|1),(CmaFree|1),(MsSinceEvent|1),(MsSincePrevWakeup|1),(WakeupsSinceEvent|1),(SkippedWakeups|1),(TaskSwapSize|1),(GPU|1),(Thrashing|1),(MaxThrashing|1),(PsiMemSome|5),(PsiMemFull|5),(PsiIoSome|5),(PsiIoFull|5),(PsiCpuSome|5)
diff --git a/liblmkd_utils.cpp b/liblmkd_utils.cpp
index 55d7f62..e5e99de 100644
--- a/liblmkd_utils.cpp
+++ b/liblmkd_utils.cpp
@@ -22,8 +22,9 @@
#include <stdio.h>
#include <unistd.h>
-#include <liblmkd_utils.h>
#include <cutils/sockets.h>
+#include <liblmkd_utils.h>
+#include <processgroup/processgroup.h>
int lmkd_connect() {
return socket_local_client("lmkd",
@@ -78,34 +79,6 @@
}
int create_memcg(uid_t uid, pid_t pid) {
- char buf[256];
- int tasks_file;
- int written;
-
- snprintf(buf, sizeof(buf), "/dev/memcg/apps/uid_%u", uid);
- if (mkdir(buf, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) < 0 &&
- errno != EEXIST) {
- return -1;
- }
-
- snprintf(buf, sizeof(buf), "/dev/memcg/apps/uid_%u/pid_%u", uid, pid);
- if (mkdir(buf, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) < 0 &&
- errno != EEXIST) {
- return -1;
- }
-
- snprintf(buf, sizeof(buf), "/dev/memcg/apps/uid_%u/pid_%u/tasks", uid, pid);
- tasks_file = open(buf, O_WRONLY);
- if (tasks_file < 0) {
- return -2;
- }
- written = snprintf(buf, sizeof(buf), "%u", pid);
- if (__predict_false(written >= (int)sizeof(buf))) {
- written = sizeof(buf) - 1;
- }
- written = TEMP_FAILURE_RETRY(write(tasks_file, buf, written));
- close(tasks_file);
-
- return (written < 0) ? -3 : 0;
+ return createProcessGroup(uid, pid, true) == 0 ? 0 : -1;
}
diff --git a/libpsi/include/psi/psi.h b/libpsi/include/psi/psi.h
index cd49e8b..9162446 100644
--- a/libpsi/include/psi/psi.h
+++ b/libpsi/include/psi/psi.h
@@ -22,12 +22,29 @@
__BEGIN_DECLS
+#define PSI_PATH_MEMORY "/proc/pressure/memory"
+#define PSI_PATH_IO "/proc/pressure/io"
+#define PSI_PATH_CPU "/proc/pressure/cpu"
+
enum psi_stall_type {
PSI_SOME,
PSI_FULL,
PSI_TYPE_COUNT
};
+struct psi_stats {
+ float avg10;
+ float avg60;
+ float avg300;
+ unsigned long total;
+};
+
+struct psi_data {
+ struct psi_stats mem_stats[PSI_TYPE_COUNT];
+ struct psi_stats io_stats[PSI_TYPE_COUNT];
+ struct psi_stats cpu_stats[PSI_TYPE_COUNT];
+};
+
/*
* Initializes psi monitor.
* stall_type, threshold_us and window_us are monitor parameters
@@ -63,6 +80,13 @@
*/
void destroy_psi_monitor(int fd);
+/*
+ * Parse psi file line content. Expected file format is:
+ * some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+ * full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+ */
+int parse_psi_line(char *line, enum psi_stall_type stall_type, struct psi_stats stats[]);
+
__END_DECLS
#endif // __ANDROID_PSI_H__
diff --git a/libpsi/psi.cpp b/libpsi/psi.cpp
index 89f07ed..54f9971 100644
--- a/libpsi/psi.cpp
+++ b/libpsi/psi.cpp
@@ -28,8 +28,6 @@
#include <stdio.h>
#include "psi/psi.h"
-#define PSI_MON_FILE_MEMORY "/proc/pressure/memory"
-
static const char* stall_type_name[] = {
"some",
"full",
@@ -41,7 +39,7 @@
int res;
char buf[256];
- fd = TEMP_FAILURE_RETRY(open(PSI_MON_FILE_MEMORY, O_WRONLY | O_CLOEXEC));
+ fd = TEMP_FAILURE_RETRY(open(PSI_PATH_MEMORY, O_WRONLY | O_CLOEXEC));
if (fd < 0) {
ALOGE("No kernel psi monitor support (errno=%d)", errno);
return -1;
@@ -61,7 +59,7 @@
if (res >= (ssize_t)sizeof(buf)) {
ALOGE("%s line overflow for psi stall type '%s'",
- PSI_MON_FILE_MEMORY, stall_type_name[stall_type]);
+ PSI_PATH_MEMORY, stall_type_name[stall_type]);
errno = EINVAL;
goto err;
}
@@ -69,7 +67,7 @@
res = TEMP_FAILURE_RETRY(write(fd, buf, strlen(buf) + 1));
if (res < 0) {
ALOGE("%s write failed for psi stall type '%s'; errno=%d",
- PSI_MON_FILE_MEMORY, stall_type_name[stall_type], errno);
+ PSI_PATH_MEMORY, stall_type_name[stall_type], errno);
goto err;
}
@@ -102,3 +100,17 @@
close(fd);
}
}
+
+int parse_psi_line(char *line, enum psi_stall_type stall_type, struct psi_stats stats[]) {
+ char type_name[5];
+ struct psi_stats *stat = &stats[stall_type];
+
+ if (!line || sscanf(line, "%4s avg10=%f avg60=%f avg300=%f total=%lu",
+ type_name, &stat->avg10, &stat->avg60, &stat->avg300, &stat->total) != 5) {
+ return -1;
+ }
+ if (strcmp(type_name, stall_type_name[stall_type])) {
+ return -1;
+ }
+ return 0;
+}
diff --git a/lmkd.cpp b/lmkd.cpp
index 68f9729..741e891 100644
--- a/lmkd.cpp
+++ b/lmkd.cpp
@@ -16,12 +16,10 @@
#define LOG_TAG "lowmemorykiller"
-#include <dirent.h>
#include <errno.h>
#include <inttypes.h>
#include <pwd.h>
#include <sched.h>
-#include <signal.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
@@ -30,17 +28,17 @@
#include <sys/eventfd.h>
#include <sys/mman.h>
#include <sys/pidfd.h>
-#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <sys/sysinfo.h>
-#include <sys/time.h>
-#include <sys/types.h>
#include <time.h>
#include <unistd.h>
+#include <algorithm>
+#include <array>
+#include <shared_mutex>
+
#include <cutils/properties.h>
-#include <cutils/sched_policy.h>
#include <cutils/sockets.h>
#include <liblmkd_utils.h>
#include <lmkd.h>
@@ -48,10 +46,12 @@
#include <log/log_event_list.h>
#include <log/log_time.h>
#include <private/android_filesystem_config.h>
+#include <processgroup/processgroup.h>
#include <psi/psi.h>
-#include <system/thread_defs.h>
+#include "reaper.h"
#include "statslog.h"
+#include "watchdog.h"
#define BPF_FD_JUST_USE_INT
#include "BpfSyscallWrappers.h"
@@ -65,13 +65,20 @@
#define ATRACE_TAG ATRACE_TAG_ALWAYS
#include <cutils/trace.h>
-#define TRACE_KILL_START(pid) ATRACE_INT(__FUNCTION__, pid);
-#define TRACE_KILL_END() ATRACE_INT(__FUNCTION__, 0);
+static inline void trace_kill_start(int pid, const char *desc) {
+ ATRACE_INT("kill_one_process", pid);
+ ATRACE_BEGIN(desc);
+}
+
+static inline void trace_kill_end() {
+ ATRACE_END();
+ ATRACE_INT("kill_one_process", 0);
+}
#else /* LMKD_TRACE_KILLS */
-#define TRACE_KILL_START(pid) ((void)(pid))
-#define TRACE_KILL_END() ((void)0)
+static inline void trace_kill_start(int, const char *) {}
+static inline void trace_kill_end() {}
#endif /* LMKD_TRACE_KILLS */
@@ -79,9 +86,6 @@
#define __unused __attribute__((__unused__))
#endif
-#define MEMCG_SYSFS_PATH "/dev/memcg/"
-#define MEMCG_MEMORY_USAGE "/dev/memcg/memory.usage_in_bytes"
-#define MEMCG_MEMORYSW_USAGE "/dev/memcg/memory.memsw.usage_in_bytes"
#define ZONEINFO_PATH "/proc/zoneinfo"
#define MEMINFO_PATH "/proc/meminfo"
#define VMSTAT_PATH "/proc/vmstat"
@@ -99,7 +103,6 @@
#define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree"
#define INKERNEL_ADJ_PATH "/sys/module/lowmemorykiller/parameters/adj"
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
#define EIGHT_MEGA (1 << 23)
#define TARGET_UPDATE_MIN_INTERVAL_MS 1000
@@ -136,9 +139,6 @@
/* Polling period after PSI signal when pressure is low */
#define PSI_POLL_PERIOD_LONG_MS 100
-#define min(a, b) (((a) < (b)) ? (a) : (b))
-#define max(a, b) (((a) > (b)) ? (a) : (b))
-
#define FAIL_REPORT_RLIMIT_MS 1000
/*
@@ -160,6 +160,8 @@
#define LMKD_REINIT_PROP "lmkd.reinit"
+#define WATCHDOG_TIMEOUT_SEC 2
+
/* default to old in-kernel interface if no memory pressure events */
static bool use_inkernel_interface = true;
static bool has_inkernel_module;
@@ -212,6 +214,7 @@
static int thrashing_critical_pct;
static int swap_util_max;
static int64_t filecache_min_kb;
+static int64_t stall_limit_critical;
static bool use_psi_monitors = false;
static int kpoll_fd;
static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = {
@@ -221,6 +224,8 @@
};
static android_log_context ctx;
+static Reaper reaper;
+static int reaper_comm_fd[2];
enum polling_update {
POLLING_DO_NOT_CHANGE,
@@ -270,9 +275,9 @@
/*
* 1 ctrl listen socket, 3 ctrl data socket, 3 memory pressure levels,
- * 1 lmk events + 1 fd to wait for process death
+ * 1 lmk events + 1 fd to wait for process death + 1 fd to receive kill failure notifications
*/
-#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1)
+#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT + 1 + 1 + 1)
static int epollfd;
static int maxevents;
@@ -280,8 +285,8 @@
#define OOM_SCORE_ADJ_MIN (-1000)
#define OOM_SCORE_ADJ_MAX 1000
-static int lowmem_adj[MAX_TARGETS];
-static int lowmem_minfree[MAX_TARGETS];
+static std::array<int, MAX_TARGETS> lowmem_adj;
+static std::array<int, MAX_TARGETS> lowmem_minfree;
static int lowmem_targets_size;
/* Fields to parse in /proc/zoneinfo */
@@ -520,6 +525,11 @@
#define ADJTOSLOT(adj) ((adj) + -OOM_SCORE_ADJ_MIN)
#define ADJTOSLOT_COUNT (ADJTOSLOT(OOM_SCORE_ADJ_MAX) + 1)
+
+// protects procadjslot_list from concurrent access
+static std::shared_mutex adjslot_list_lock;
+// procadjslot_list should be modified only from the main thread while exclusively holding
+// adjslot_list_lock. Readers from non-main threads should hold adjslot_list_lock shared lock.
static struct adjslot_list procadjslot_list[ADJTOSLOT_COUNT];
#define MAX_DISTINCT_OOM_ADJ 32
@@ -542,7 +552,7 @@
static void destroy_monitors();
static int clamp(int low, int high, int value) {
- return max(min(value, high), low);
+ return std::max(std::min(value, high), low);
}
static bool parse_int64(const char* str, int64_t* ret) {
@@ -823,7 +833,7 @@
while (1) {
char rd_buf[256];
- int bytes_read = TEMP_FAILURE_RETRY(pread(poll_fd, (void*)rd_buf, sizeof(rd_buf), 0));
+ int bytes_read = TEMP_FAILURE_RETRY(pread(poll_fd, (void*)rd_buf, sizeof(rd_buf) - 1, 0));
if (bytes_read <= 0) break;
rd_buf[bytes_read] = '\0';
@@ -909,13 +919,18 @@
return asl == head ? NULL : asl;
}
+// Should be modified only from the main thread.
static void proc_slot(struct proc *procp) {
int adjslot = ADJTOSLOT(procp->oomadj);
+ std::scoped_lock lock(adjslot_list_lock);
adjslot_insert(&procadjslot_list[adjslot], &procp->asl);
}
+// Should be modified only from the main thread.
static void proc_unslot(struct proc *procp) {
+ std::scoped_lock lock(adjslot_list_lock);
+
adjslot_remove(&procp->asl);
}
@@ -1165,9 +1180,12 @@
soft_limit_mult = 64;
}
- snprintf(path, sizeof(path), MEMCG_SYSFS_PATH
- "apps/uid_%d/pid_%d/memory.soft_limit_in_bytes",
- params.uid, params.pid);
+ std::string path;
+ if (!CgroupGetAttributePathForTask("MemSoftLimit", params.pid, &path)) {
+ ALOGE("Querying MemSoftLimit path failed");
+ return;
+ }
+
snprintf(val, sizeof(val), "%d", soft_limit_mult * EIGHT_MEGA);
/*
@@ -1177,7 +1195,7 @@
is_system_server = (params.oomadj == SYSTEM_ADJ &&
(pwdrec = getpwnam("system")) != NULL &&
params.uid == pwdrec->pw_uid);
- writefilestring(path, val, !is_system_server);
+ writefilestring(path.c_str(), val, !is_system_server);
}
procp = pid_lookup(params.pid);
@@ -1359,8 +1377,9 @@
static struct timespec last_req_tm;
struct timespec curr_tm;
- if (ntargets < 1 || ntargets > (int)ARRAY_SIZE(lowmem_adj))
+ if (ntargets < 1 || ntargets > (int)lowmem_adj.size()) {
return;
+ }
/*
* Ratelimit minfree updates to once per TARGET_UPDATE_MIN_INTERVAL_MS
@@ -1452,8 +1471,9 @@
switch(cmd) {
case LMK_TARGET:
targets = nargs / 2;
- if (nargs & 0x1 || targets > (int)ARRAY_SIZE(lowmem_adj))
+ if (nargs & 0x1 || targets > (int)lowmem_adj.size()) {
goto wronglen;
+ }
cmd_target(targets, packet);
break;
case LMK_PROCPRIO:
@@ -1897,6 +1917,53 @@
return 0;
}
+static int psi_parse(struct reread_data *file_data, struct psi_stats stats[], bool full) {
+ char *buf;
+ char *save_ptr;
+ char *line;
+
+ if ((buf = reread_file(file_data)) == NULL) {
+ return -1;
+ }
+
+ line = strtok_r(buf, "\n", &save_ptr);
+ if (parse_psi_line(line, PSI_SOME, stats)) {
+ return -1;
+ }
+ if (full) {
+ line = strtok_r(NULL, "\n", &save_ptr);
+ if (parse_psi_line(line, PSI_FULL, stats)) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int psi_parse_mem(struct psi_data *psi_data) {
+ static struct reread_data file_data = {
+ .filename = PSI_PATH_MEMORY,
+ .fd = -1,
+ };
+ return psi_parse(&file_data, psi_data->mem_stats, true);
+}
+
+static int psi_parse_io(struct psi_data *psi_data) {
+ static struct reread_data file_data = {
+ .filename = PSI_PATH_IO,
+ .fd = -1,
+ };
+ return psi_parse(&file_data, psi_data->io_stats, true);
+}
+
+static int psi_parse_cpu(struct psi_data *psi_data) {
+ static struct reread_data file_data = {
+ .filename = PSI_PATH_CPU,
+ .fd = -1,
+ };
+ return psi_parse(&file_data, psi_data->cpu_stats, false);
+}
+
enum wakeup_reason {
Event,
Polling
@@ -1932,38 +1999,96 @@
}
}
+struct kill_info {
+ enum kill_reasons kill_reason;
+ const char *kill_desc;
+ int thrashing;
+ int max_thrashing;
+};
+
static void killinfo_log(struct proc* procp, int min_oom_score, int rss_kb,
- int swap_kb, int kill_reason, union meminfo *mi,
- struct wakeup_info *wi, struct timespec *tm) {
+ int swap_kb, struct kill_info *ki, union meminfo *mi,
+ struct wakeup_info *wi, struct timespec *tm, struct psi_data *pd) {
/* log process information */
android_log_write_int32(ctx, procp->pid);
android_log_write_int32(ctx, procp->uid);
android_log_write_int32(ctx, procp->oomadj);
android_log_write_int32(ctx, min_oom_score);
- android_log_write_int32(ctx, (int32_t)min(rss_kb, INT32_MAX));
- android_log_write_int32(ctx, kill_reason);
+ android_log_write_int32(ctx, std::min(rss_kb, (int)INT32_MAX));
+ android_log_write_int32(ctx, ki ? ki->kill_reason : NONE);
/* log meminfo fields */
for (int field_idx = 0; field_idx < MI_FIELD_COUNT; field_idx++) {
- android_log_write_int32(ctx, (int32_t)min(mi->arr[field_idx] * page_k, INT32_MAX));
+ android_log_write_int32(ctx,
+ mi ? std::min(mi->arr[field_idx] * page_k, (int64_t)INT32_MAX) : 0);
}
/* log lmkd wakeup information */
- android_log_write_int32(ctx, (int32_t)get_time_diff_ms(&wi->last_event_tm, tm));
- android_log_write_int32(ctx, (int32_t)get_time_diff_ms(&wi->prev_wakeup_tm, tm));
- android_log_write_int32(ctx, wi->wakeups_since_event);
- android_log_write_int32(ctx, wi->skipped_wakeups);
- android_log_write_int32(ctx, (int32_t)min(swap_kb, INT32_MAX));
- android_log_write_int32(ctx, (int32_t)mi->field.total_gpu_kb);
+ if (wi) {
+ android_log_write_int32(ctx, (int32_t)get_time_diff_ms(&wi->last_event_tm, tm));
+ android_log_write_int32(ctx, (int32_t)get_time_diff_ms(&wi->prev_wakeup_tm, tm));
+ android_log_write_int32(ctx, wi->wakeups_since_event);
+ android_log_write_int32(ctx, wi->skipped_wakeups);
+ } else {
+ android_log_write_int32(ctx, 0);
+ android_log_write_int32(ctx, 0);
+ android_log_write_int32(ctx, 0);
+ android_log_write_int32(ctx, 0);
+ }
+
+ android_log_write_int32(ctx, std::min(swap_kb, (int)INT32_MAX));
+ android_log_write_int32(ctx, mi ? (int32_t)mi->field.total_gpu_kb : 0);
+ if (ki) {
+ android_log_write_int32(ctx, ki->thrashing);
+ android_log_write_int32(ctx, ki->max_thrashing);
+ } else {
+ android_log_write_int32(ctx, 0);
+ android_log_write_int32(ctx, 0);
+ }
+
+ if (pd) {
+ android_log_write_float32(ctx, pd->mem_stats[PSI_SOME].avg10);
+ android_log_write_float32(ctx, pd->mem_stats[PSI_FULL].avg10);
+ android_log_write_float32(ctx, pd->io_stats[PSI_SOME].avg10);
+ android_log_write_float32(ctx, pd->io_stats[PSI_FULL].avg10);
+ android_log_write_float32(ctx, pd->cpu_stats[PSI_SOME].avg10);
+ } else {
+ for (int i = 0; i < 5; i++) {
+ android_log_write_float32(ctx, 0);
+ }
+ }
android_log_write_list(ctx, LOG_ID_EVENTS);
android_log_reset(ctx);
}
-static struct proc *proc_adj_lru(int oomadj) {
+// Note: returned entry is only an anchor and does not hold a valid process info.
+// When called from a non-main thread, adjslot_list_lock read lock should be taken.
+static struct proc *proc_adj_head(int oomadj) {
+ return (struct proc *)&procadjslot_list[ADJTOSLOT(oomadj)];
+}
+
+// When called from a non-main thread, adjslot_list_lock read lock should be taken.
+static struct proc *proc_adj_tail(int oomadj) {
return (struct proc *)adjslot_tail(&procadjslot_list[ADJTOSLOT(oomadj)]);
}
+// When called from a non-main thread, adjslot_list_lock read lock should be taken.
+static struct proc *proc_adj_prev(int oomadj, int pid) {
+ struct adjslot_list *head = &procadjslot_list[ADJTOSLOT(oomadj)];
+ struct adjslot_list *curr = adjslot_tail(&procadjslot_list[ADJTOSLOT(oomadj)]);
+
+ while (curr != head) {
+ if (((struct proc *)curr)->pid == pid) {
+ return (struct proc *)curr->prev;
+ }
+ curr = curr->prev;
+ }
+
+ return NULL;
+}
+
+// When called from a non-main thread, adjslot_list_lock read lock should be taken.
static struct proc *proc_get_heaviest(int oomadj) {
struct adjslot_list *head = &procadjslot_list[ADJTOSLOT(oomadj)];
struct adjslot_list *curr = head->next;
@@ -1987,41 +2112,55 @@
return maxprocp;
}
-static void set_process_group_and_prio(int pid, SchedPolicy sp, int prio) {
- DIR* d;
- char proc_path[PATH_MAX];
- struct dirent* de;
+static bool find_victim(int oom_score, int prev_pid, struct proc &target_proc) {
+ struct proc *procp;
+ std::shared_lock lock(adjslot_list_lock);
- snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid);
- if (!(d = opendir(proc_path))) {
- ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno,
- pid);
- return;
- }
-
- while ((de = readdir(d))) {
- int t_pid;
-
- if (de->d_name[0] == '.') continue;
- t_pid = atoi(de->d_name);
-
- if (!t_pid) {
- ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid);
- continue;
- }
-
- if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) {
- ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno);
- }
-
- if (set_cpuset_policy(t_pid, sp)) {
- ALOGW("Failed to set_cpuset_policy on pid(%d) t_pid(%d) to %d", pid, t_pid, (int)sp);
- continue;
+ if (!prev_pid) {
+ procp = proc_adj_tail(oom_score);
+ } else {
+ procp = proc_adj_prev(oom_score, prev_pid);
+ if (!procp) {
+ // pid was removed, restart at the tail
+ procp = proc_adj_tail(oom_score);
}
}
- closedir(d);
+
+ // the list is empty at this oom_score or we looped through it
+ if (!procp || procp == proc_adj_head(oom_score)) {
+ return false;
+ }
+
+ // make a copy because original might be destroyed after adjslot_list_lock is released
+ target_proc = *procp;
+
+ return true;
}
+static void watchdog_callback() {
+ int prev_pid = 0;
+
+ ALOGW("lmkd watchdog timed out!");
+ for (int oom_score = OOM_SCORE_ADJ_MAX; oom_score >= 0;) {
+ struct proc target;
+
+ if (!find_victim(oom_score, prev_pid, target)) {
+ oom_score--;
+ prev_pid = 0;
+ continue;
+ }
+
+ if (reaper.kill({ target.pidfd, target.pid, target.uid }, true) == 0) {
+ ALOGW("lmkd watchdog killed process %d, oom_score_adj %d", target.pid, oom_score);
+ killinfo_log(&target, 0, 0, 0, NULL, NULL, NULL, NULL, NULL);
+ break;
+ }
+ prev_pid = target.pid;
+ }
+}
+
+static Watchdog watchdog(WATCHDOG_TIMEOUT_SEC, watchdog_callback);
+
static bool is_kill_pending(void) {
char buf[24];
@@ -2092,6 +2231,19 @@
poll_params->update = POLLING_RESUME;
}
+static void kill_fail_handler(int data __unused, uint32_t events __unused,
+ struct polling_params *poll_params) {
+ int pid;
+
+ // Extract pid from the communication pipe. Clearing the pipe this way allows further
+ // epoll_wait calls to sleep until the next event.
+ if (TEMP_FAILURE_RETRY(read(reaper_comm_fd[0], &pid, sizeof(pid))) != sizeof(pid)) {
+ ALOGE("thread communication read failed: %s", strerror(errno));
+ }
+ stop_wait_for_proc_kill(false);
+ poll_params->update = POLLING_RESUME;
+}
+
static void start_wait_for_proc_kill(int pid_or_fd) {
static struct event_handler_info kill_done_hinfo = { 0, kill_done_handler };
struct epoll_event epev;
@@ -2120,21 +2272,15 @@
maxevents++;
}
-struct kill_info {
- enum kill_reasons kill_reason;
- const char *kill_desc;
- int thrashing;
- int max_thrashing;
-};
-
/* Kill one process specified by procp. Returns the size (in pages) of the process killed */
static int kill_one_process(struct proc* procp, int min_oom_score, struct kill_info *ki,
- union meminfo *mi, struct wakeup_info *wi, struct timespec *tm) {
+ union meminfo *mi, struct wakeup_info *wi, struct timespec *tm,
+ struct psi_data *pd) {
int pid = procp->pid;
int pidfd = procp->pidfd;
uid_t uid = procp->uid;
char *taskname;
- int r;
+ int kill_result;
int result = -1;
struct memory_stat *mem_st;
struct kill_stat kill_st;
@@ -2142,6 +2288,7 @@
int64_t rss_kb;
int64_t swap_kb;
char buf[PAGE_SIZE];
+ char desc[LINE_MAX];
if (!read_proc_status(pid, buf, sizeof(buf))) {
goto out;
@@ -2170,28 +2317,23 @@
mem_st = stats_read_memory_stat(per_app_memcg, pid, uid, rss_kb * 1024, swap_kb * 1024);
- TRACE_KILL_START(pid);
+ snprintf(desc, sizeof(desc), "lmk,%d,%d,%d,%d,%d", pid, ki ? (int)ki->kill_reason : -1,
+ procp->oomadj, min_oom_score, ki ? ki->max_thrashing : -1);
- /* CAP_KILL required */
- if (pidfd < 0) {
- start_wait_for_proc_kill(pid);
- r = kill(pid, SIGKILL);
- } else {
- start_wait_for_proc_kill(pidfd);
- r = pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
- }
+ trace_kill_start(pid, desc);
- TRACE_KILL_END();
+ start_wait_for_proc_kill(pidfd < 0 ? pid : pidfd);
+ kill_result = reaper.kill({ pidfd, pid, uid }, false);
- if (r) {
+ trace_kill_end();
+
+ if (kill_result) {
stop_wait_for_proc_kill(false);
ALOGE("kill(%d): errno=%d", pid, errno);
/* Delete process record even when we fail to kill so that we don't get stuck on it */
goto out;
}
- set_process_group_and_prio(pid, SP_FOREGROUND, ANDROID_PRIORITY_HIGHEST);
-
last_kill_tm = *tm;
inc_killcnt(procp->oomadj);
@@ -2200,7 +2342,6 @@
kill_st.kill_reason = ki->kill_reason;
kill_st.thrashing = ki->thrashing;
kill_st.max_thrashing = ki->max_thrashing;
- killinfo_log(procp, min_oom_score, rss_kb, swap_kb, ki->kill_reason, mi, wi, tm);
ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64
"kB swap; reason: %s", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb,
ki->kill_desc);
@@ -2208,10 +2349,10 @@
kill_st.kill_reason = NONE;
kill_st.thrashing = 0;
kill_st.max_thrashing = 0;
- killinfo_log(procp, min_oom_score, rss_kb, swap_kb, NONE, mi, wi, tm);
ALOGI("Kill '%s' (%d), uid %d, oom_score_adj %d to free %" PRId64 "kB rss, %" PRId64
"kb swap", taskname, pid, uid, procp->oomadj, rss_kb, swap_kb);
}
+ killinfo_log(procp, min_oom_score, rss_kb, swap_kb, ki, mi, wi, tm, pd);
kill_st.uid = static_cast<int32_t>(uid);
kill_st.taskname = taskname;
@@ -2239,7 +2380,8 @@
* Returns size of the killed process.
*/
static int find_and_kill_process(int min_score_adj, struct kill_info *ki, union meminfo *mi,
- struct wakeup_info *wi, struct timespec *tm) {
+ struct wakeup_info *wi, struct timespec *tm,
+ struct psi_data *pd) {
int i;
int killed_size = 0;
bool lmk_state_change_start = false;
@@ -2258,12 +2400,12 @@
while (true) {
procp = choose_heaviest_task ?
- proc_get_heaviest(i) : proc_adj_lru(i);
+ proc_get_heaviest(i) : proc_adj_tail(i);
if (!procp)
break;
- killed_size = kill_one_process(procp, min_score_adj, ki, mi, wi, tm);
+ killed_size = kill_one_process(procp, min_score_adj, ki, mi, wi, tm, pd);
if (killed_size >= 0) {
if (!lmk_state_change_start) {
lmk_state_change_start = true;
@@ -2410,7 +2552,6 @@
static int64_t base_file_lru;
static int64_t init_pgscan_kswapd;
static int64_t init_pgscan_direct;
- static int64_t swap_low_threshold;
static bool killing;
static int thrashing_limit = thrashing_limit_pct;
static struct zone_watermarks watermarks;
@@ -2423,6 +2564,7 @@
union meminfo mi;
union vmstat vs;
+ struct psi_data psi_data;
struct timespec curr_tm;
int64_t thrashing = 0;
bool swap_is_low = false;
@@ -2435,8 +2577,10 @@
bool cut_thrashing_limit = false;
int min_score_adj = 0;
int swap_util = 0;
+ int64_t swap_low_threshold;
long since_thrashing_reset_ms;
int64_t workingset_refault_file;
+ bool critical_stall = false;
if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
ALOGE("Failed to get current time");
@@ -2483,10 +2627,10 @@
/* Check free swap levels */
if (swap_free_low_percentage) {
- if (!swap_low_threshold) {
- swap_low_threshold = mi.field.total_swap * swap_free_low_percentage / 100;
- }
+ swap_low_threshold = mi.field.total_swap * swap_free_low_percentage / 100;
swap_is_low = mi.field.free_swap < swap_low_threshold;
+ } else {
+ swap_low_threshold = 0;
}
/* Identify reclaim state */
@@ -2569,6 +2713,9 @@
/* Find out which watermark is breached if any */
wmark = get_lowest_watermark(&mi, &watermarks);
+ if (!psi_parse_mem(&psi_data)) {
+ critical_stall = psi_data.mem_stats[PSI_FULL].avg10 > (float)stall_limit_critical;
+ }
/*
* TODO: move this logic into a separate function
* Decide if killing a process is necessary and record the reason
@@ -2666,7 +2813,14 @@
.thrashing = (int)thrashing,
.max_thrashing = max_thrashing,
};
- int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm);
+
+ /* Allow killing perceptible apps if the system is stalled */
+ if (critical_stall) {
+ min_score_adj = 0;
+ }
+ psi_parse_io(&psi_data);
+ psi_parse_cpu(&psi_data);
+ int pages_freed = find_and_kill_process(min_score_adj, &ki, &mi, &wi, &curr_tm, &psi_data);
if (pages_freed > 0) {
killing = true;
max_thrashing = 0;
@@ -2708,6 +2862,16 @@
}
}
+static std::string GetCgroupAttributePath(const char* attr) {
+ std::string path;
+ if (!CgroupGetAttributePath(attr, &path)) {
+ ALOGE("Unknown cgroup attribute %s", attr);
+ }
+ return path;
+}
+
+// The implementation of this function relies on memcg statistics that are only available in the
+// v1 cgroup hierarchy.
static void mp_event_common(int data, uint32_t events, struct polling_params *poll_params) {
unsigned long long evcount;
int64_t mem_usage, memsw_usage;
@@ -2720,12 +2884,14 @@
long other_free = 0, other_file = 0;
int min_score_adj;
int minfree = 0;
+ static const std::string mem_usage_path = GetCgroupAttributePath("MemUsage");
static struct reread_data mem_usage_file_data = {
- .filename = MEMCG_MEMORY_USAGE,
+ .filename = mem_usage_path.c_str(),
.fd = -1,
};
+ static const std::string memsw_usage_path = GetCgroupAttributePath("MemAndSwapUsage");
static struct reread_data memsw_usage_file_data = {
- .filename = MEMCG_MEMORYSW_USAGE,
+ .filename = memsw_usage_path.c_str(),
.fd = -1,
};
static struct wakeup_info wi;
@@ -2822,7 +2988,7 @@
}
if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
- if (debug_process_killing) {
+ if (debug_process_killing && lowmem_targets_size) {
ALOGI("Ignore %s memory pressure event "
"(free memory=%ldkB, cache=%ldkB, limit=%ldkB)",
level_name[level], other_free * page_k, other_file * page_k,
@@ -2886,7 +3052,7 @@
do_kill:
if (low_ram_device) {
/* For Go devices kill only one task */
- if (find_and_kill_process(level_oomadj[level], NULL, &mi, &wi, &curr_tm) == 0) {
+ if (find_and_kill_process(level_oomadj[level], NULL, &mi, &wi, &curr_tm, NULL) == 0) {
if (debug_process_killing) {
ALOGI("Nothing to kill");
}
@@ -2909,7 +3075,7 @@
min_score_adj = level_oomadj[level];
}
- pages_freed = find_and_kill_process(min_score_adj, NULL, &mi, &wi, &curr_tm);
+ pages_freed = find_and_kill_process(min_score_adj, NULL, &mi, &wi, &curr_tm, NULL);
if (pages_freed == 0) {
/* Rate limit kill reports when nothing was reclaimed */
@@ -2988,14 +3154,44 @@
mpevfd[level] = -1;
}
+enum class MemcgVersion {
+ kNotFound,
+ kV1,
+ kV2,
+};
+
+static MemcgVersion __memcg_version() {
+ std::string cgroupv2_path, memcg_path;
+
+ if (!CgroupGetControllerPath("memory", &memcg_path)) {
+ return MemcgVersion::kNotFound;
+ }
+ return CgroupGetControllerPath(CGROUPV2_CONTROLLER_NAME, &cgroupv2_path) &&
+ cgroupv2_path == memcg_path
+ ? MemcgVersion::kV2
+ : MemcgVersion::kV1;
+}
+
+static MemcgVersion memcg_version() {
+ static MemcgVersion version = __memcg_version();
+
+ return version;
+}
+
static bool init_psi_monitors() {
/*
* When PSI is used on low-ram devices or on high-end devices without memfree levels
- * use new kill strategy based on zone watermarks, free swap and thrashing stats
+ * use new kill strategy based on zone watermarks, free swap and thrashing stats.
+ * Also use the new strategy if memcg has not been mounted in the v1 cgroups hiearchy since
+ * the old strategy relies on memcg attributes that are available only in the v1 cgroups
+ * hiearchy.
*/
bool use_new_strategy =
GET_LMK_PROPERTY(bool, "use_new_strategy", low_ram_device || !use_minfree_levels);
-
+ if (!use_new_strategy && memcg_version() != MemcgVersion::kV1) {
+ ALOGE("Old kill strategy can only be used with v1 cgroup hierarchy");
+ return false;
+ }
/* In default PSI mode override stall amounts using system properties */
if (use_new_strategy) {
/* Do not use low pressure level */
@@ -3020,6 +3216,13 @@
}
static bool init_mp_common(enum vmpressure_level level) {
+ // The implementation of this function relies on memcg statistics that are only available in the
+ // v1 cgroup hierarchy.
+ if (memcg_version() != MemcgVersion::kV1) {
+ ALOGE("%s: global monitoring is only available for the v1 cgroup hierarchy", __func__);
+ return false;
+ }
+
int mpfd;
int evfd;
int evctlfd;
@@ -3030,13 +3233,13 @@
const char *levelstr = level_name[level_idx];
/* gid containing AID_SYSTEM required */
- mpfd = open(MEMCG_SYSFS_PATH "memory.pressure_level", O_RDONLY | O_CLOEXEC);
+ mpfd = open(GetCgroupAttributePath("MemPressureLevel").c_str(), O_RDONLY | O_CLOEXEC);
if (mpfd < 0) {
ALOGI("No kernel memory.pressure_level support (errno=%d)", errno);
goto err_open_mpfd;
}
- evctlfd = open(MEMCG_SYSFS_PATH "cgroup.event_control", O_WRONLY | O_CLOEXEC);
+ evctlfd = open(GetCgroupAttributePath("CgroupEventControl").c_str(), O_WRONLY | O_CLOEXEC);
if (evctlfd < 0) {
ALOGI("No kernel memory cgroup event control (errno=%d)", errno);
goto err_open_evctlfd;
@@ -3140,6 +3343,63 @@
}
}
+static void drop_reaper_comm() {
+ close(reaper_comm_fd[0]);
+ close(reaper_comm_fd[1]);
+}
+
+static bool setup_reaper_comm() {
+ if (pipe(reaper_comm_fd)) {
+ ALOGE("pipe failed: %s", strerror(errno));
+ return false;
+ }
+
+ // Ensure main thread never blocks on read
+ int flags = fcntl(reaper_comm_fd[0], F_GETFL);
+ if (fcntl(reaper_comm_fd[0], F_SETFL, flags | O_NONBLOCK)) {
+ ALOGE("fcntl failed: %s", strerror(errno));
+ drop_reaper_comm();
+ return false;
+ }
+
+ return true;
+}
+
+static bool init_reaper() {
+ if (!reaper.is_reaping_supported()) {
+ ALOGI("Process reaping is not supported");
+ return false;
+ }
+
+ if (!setup_reaper_comm()) {
+ ALOGE("Failed to create thread communication channel");
+ return false;
+ }
+
+ // Setup epoll handler
+ struct epoll_event epev;
+ static struct event_handler_info kill_failed_hinfo = { 0, kill_fail_handler };
+ epev.events = EPOLLIN;
+ epev.data.ptr = (void *)&kill_failed_hinfo;
+ if (epoll_ctl(epollfd, EPOLL_CTL_ADD, reaper_comm_fd[0], &epev)) {
+ ALOGE("epoll_ctl failed: %s", strerror(errno));
+ drop_reaper_comm();
+ return false;
+ }
+
+ if (!reaper.init(reaper_comm_fd[1])) {
+ ALOGE("Failed to initialize reaper object");
+ if (epoll_ctl(epollfd, EPOLL_CTL_DEL, reaper_comm_fd[0], &epev)) {
+ ALOGE("epoll_ctl failed: %s", strerror(errno));
+ }
+ drop_reaper_comm();
+ return false;
+ }
+ maxevents++;
+
+ return true;
+}
+
static int init(void) {
static struct event_handler_info kernel_poll_hinfo = { 0, kernel_event_handler };
struct reread_data file_data = {
@@ -3257,6 +3517,7 @@
struct polling_params *poll_params, uint32_t events) {
struct timespec curr_tm;
+ watchdog.start();
poll_params->update = POLLING_DO_NOT_CHANGE;
handler_info->handler(handler_info->data, events, poll_params);
clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
@@ -3288,6 +3549,7 @@
}
break;
}
+ watchdog.stop();
}
static void mainloop(void) {
@@ -3369,7 +3631,9 @@
if ((evt->events & EPOLLHUP) && evt->data.ptr) {
ALOGI("lmkd data connection dropped");
handler_info = (struct event_handler_info*)evt->data.ptr;
+ watchdog.start();
ctrl_data_close(handler_info->data);
+ watchdog.stop();
}
}
@@ -3454,14 +3718,18 @@
low_ram_device ? DEF_PARTIAL_STALL_LOWRAM : DEF_PARTIAL_STALL);
psi_complete_stall_ms = GET_LMK_PROPERTY(int32, "psi_complete_stall_ms",
DEF_COMPLETE_STALL);
- thrashing_limit_pct = max(0, GET_LMK_PROPERTY(int32, "thrashing_limit",
- low_ram_device ? DEF_THRASHING_LOWRAM : DEF_THRASHING));
+ thrashing_limit_pct =
+ std::max(0, GET_LMK_PROPERTY(int32, "thrashing_limit",
+ low_ram_device ? DEF_THRASHING_LOWRAM : DEF_THRASHING));
thrashing_limit_decay_pct = clamp(0, 100, GET_LMK_PROPERTY(int32, "thrashing_limit_decay",
low_ram_device ? DEF_THRASHING_DECAY_LOWRAM : DEF_THRASHING_DECAY));
- thrashing_critical_pct = max(0, GET_LMK_PROPERTY(int32, "thrashing_limit_critical",
- thrashing_limit_pct * 2));
+ thrashing_critical_pct = std::max(
+ 0, GET_LMK_PROPERTY(int32, "thrashing_limit_critical", thrashing_limit_pct * 2));
swap_util_max = clamp(0, 100, GET_LMK_PROPERTY(int32, "swap_util_max", 100));
filecache_min_kb = GET_LMK_PROPERTY(int64, "filecache_min_kb", 0);
+ stall_limit_critical = GET_LMK_PROPERTY(int64, "stall_limit_critical", 100);
+
+ reaper.enable_debug(debug_process_killing);
}
int main(int argc, char **argv) {
@@ -3503,6 +3771,15 @@
}
}
+ if (init_reaper()) {
+ ALOGI("Process reaper initialized with %d threads in the pool",
+ reaper.thread_cnt());
+ }
+
+ if (!watchdog.init()) {
+ ALOGE("Failed to initialize the watchdog");
+ }
+
mainloop();
}
diff --git a/lmkd.rc b/lmkd.rc
index 6f90bcb..ba662b4 100644
--- a/lmkd.rc
+++ b/lmkd.rc
@@ -5,7 +5,7 @@
capabilities DAC_OVERRIDE KILL IPC_LOCK SYS_NICE SYS_RESOURCE
critical
socket lmkd seqpacket+passcred 0660 system system
- writepid /dev/cpuset/system-background/tasks
+ task_profiles ServiceCapacityLow
on property:lmkd.reinit=1
exec_background /system/bin/lmkd --reinit
diff --git a/reaper.cpp b/reaper.cpp
new file mode 100644
index 0000000..2c9e737
--- /dev/null
+++ b/reaper.cpp
@@ -0,0 +1,253 @@
+/*
+ * Copyright 2021 Google, Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "lowmemorykiller"
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <log/log.h>
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/epoll.h>
+#include <sys/pidfd.h>
+#include <sys/resource.h>
+#include <sys/sysinfo.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <processgroup/processgroup.h>
+#include <system/thread_defs.h>
+
+#include "reaper.h"
+
+#define NS_PER_MS (NS_PER_SEC / MS_PER_SEC)
+#define THREAD_POOL_SIZE 2
+
+#ifndef __NR_process_mrelease
+#define __NR_process_mrelease 448
+#endif
+
+static int process_mrelease(int pidfd, unsigned int flags) {
+ return syscall(__NR_process_mrelease, pidfd, flags);
+}
+
+static inline long get_time_diff_ms(struct timespec *from,
+ struct timespec *to) {
+ return (to->tv_sec - from->tv_sec) * (long)MS_PER_SEC +
+ (to->tv_nsec - from->tv_nsec) / (long)NS_PER_MS;
+}
+
+static void* reaper_main(void* param) {
+ Reaper *reaper = static_cast<Reaper*>(param);
+ struct timespec start_tm, end_tm;
+ struct Reaper::target_proc target;
+ pid_t tid = gettid();
+
+ // Ensure the thread does not use little cores
+ if (!SetTaskProfiles(tid, {"CPUSET_SP_FOREGROUND"}, true)) {
+ ALOGE("Failed to assign cpuset to the reaper thread");
+ }
+
+ for (;;) {
+ target = reaper->dequeue_request();
+
+ if (reaper->debug_enabled()) {
+ clock_gettime(CLOCK_MONOTONIC_COARSE, &start_tm);
+ }
+
+ if (pidfd_send_signal(target.pidfd, SIGKILL, NULL, 0)) {
+ // Inform the main thread about failure to kill
+ reaper->notify_kill_failure(target.pid);
+ goto done;
+ }
+ if (process_mrelease(target.pidfd, 0)) {
+ ALOGE("process_mrelease %d failed: %s", target.pidfd, strerror(errno));
+ goto done;
+ }
+ if (reaper->debug_enabled()) {
+ clock_gettime(CLOCK_MONOTONIC_COARSE, &end_tm);
+ ALOGI("Process %d was reaped in %ldms", target.pid,
+ get_time_diff_ms(&start_tm, &end_tm));
+ }
+done:
+ close(target.pidfd);
+ reaper->request_complete();
+ }
+
+ return NULL;
+}
+
+bool Reaper::is_reaping_supported() {
+ static enum {
+ UNKNOWN,
+ SUPPORTED,
+ UNSUPPORTED
+ } reap_support = UNKNOWN;
+
+ if (reap_support == UNKNOWN) {
+ if (process_mrelease(-1, 0) && errno == ENOSYS) {
+ reap_support = UNSUPPORTED;
+ } else {
+ reap_support = SUPPORTED;
+ }
+ }
+ return reap_support == SUPPORTED;
+}
+
+bool Reaper::init(int comm_fd) {
+ char name[16];
+
+ if (thread_cnt_ > 0) {
+ // init should not be called multiple times
+ return false;
+ }
+
+ thread_pool_ = new pthread_t[THREAD_POOL_SIZE];
+ for (int i = 0; i < THREAD_POOL_SIZE; i++) {
+ if (pthread_create(&thread_pool_[thread_cnt_], NULL, reaper_main, this)) {
+ ALOGE("pthread_create failed: %s", strerror(errno));
+ continue;
+ }
+ snprintf(name, sizeof(name), "lmkd_reaper%d", thread_cnt_);
+ if (pthread_setname_np(thread_pool_[thread_cnt_], name)) {
+ ALOGW("pthread_setname_np failed: %s", strerror(errno));
+ }
+ thread_cnt_++;
+ }
+
+ if (!thread_cnt_) {
+ delete[] thread_pool_;
+ return false;
+ }
+
+ queue_.reserve(thread_cnt_);
+ comm_fd_ = comm_fd;
+ return true;
+}
+
+static void set_process_group_and_prio(uid_t uid, int pid, const std::vector<std::string>& profiles,
+ int prio) {
+ DIR* d;
+ char proc_path[PATH_MAX];
+ struct dirent* de;
+
+ if (!SetProcessProfilesCached(uid, pid, profiles)) {
+ ALOGW("Failed to set task profiles for the process (%d) being killed", pid);
+ }
+
+ snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid);
+ if (!(d = opendir(proc_path))) {
+ ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno,
+ pid);
+ return;
+ }
+
+ while ((de = readdir(d))) {
+ int t_pid;
+
+ if (de->d_name[0] == '.') continue;
+ t_pid = atoi(de->d_name);
+
+ if (!t_pid) {
+ ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid);
+ continue;
+ }
+
+ if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) {
+ ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno);
+ }
+ }
+ closedir(d);
+}
+
+bool Reaper::async_kill(const struct target_proc& target) {
+ if (target.pidfd == -1) {
+ return false;
+ }
+
+ if (!thread_cnt_) {
+ return false;
+ }
+
+ mutex_.lock();
+ if (active_requests_ >= thread_cnt_) {
+ mutex_.unlock();
+ return false;
+ }
+ active_requests_++;
+
+ // Duplicate pidfd instead of reusing the original one to avoid synchronization and refcounting
+ // when both reaper and main threads are using or closing the pidfd
+ queue_.push_back({ dup(target.pidfd), target.pid, target.uid });
+ // Wake up a reaper thread
+ cond_.notify_one();
+ mutex_.unlock();
+
+ set_process_group_and_prio(target.uid, target.pid,
+ {"CPUSET_SP_FOREGROUND", "SCHED_SP_FOREGROUND"},
+ ANDROID_PRIORITY_HIGHEST);
+
+ return true;
+}
+
+int Reaper::kill(const struct target_proc& target, bool synchronous) {
+ /* CAP_KILL required */
+ if (target.pidfd < 0) {
+ return ::kill(target.pid, SIGKILL);
+ }
+
+ if (!synchronous && async_kill(target)) {
+ // we assume the kill will be successful and if it fails we will be notified
+ return 0;
+ }
+
+ int result = pidfd_send_signal(target.pidfd, SIGKILL, NULL, 0);
+ if (result) {
+ return result;
+ }
+
+ return is_reaping_supported() ? process_mrelease(target.pidfd, 0) : 0;
+}
+
+Reaper::target_proc Reaper::dequeue_request() {
+ struct target_proc target;
+ std::unique_lock<std::mutex> lock(mutex_);
+
+ while (queue_.empty()) {
+ cond_.wait(lock);
+ }
+ target = queue_.back();
+ queue_.pop_back();
+
+ return target;
+}
+
+void Reaper::request_complete() {
+ std::scoped_lock<std::mutex> lock(mutex_);
+ active_requests_--;
+}
+
+void Reaper::notify_kill_failure(int pid) {
+ std::scoped_lock<std::mutex> lock(mutex_);
+
+ ALOGE("Failed to kill process %d", pid);
+ if (TEMP_FAILURE_RETRY(write(comm_fd_, &pid, sizeof(pid))) != sizeof(pid)) {
+ ALOGE("thread communication write failed: %s", strerror(errno));
+ }
+}
diff --git a/reaper.h b/reaper.h
new file mode 100644
index 0000000..e20d892
--- /dev/null
+++ b/reaper.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2021 Google, Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <condition_variable>
+#include <mutex>
+#include <vector>
+
+class Reaper {
+public:
+ struct target_proc {
+ int pidfd;
+ int pid;
+ uid_t uid;
+ };
+private:
+ // mutex_ and cond_ are used to wakeup the reaper thread.
+ std::mutex mutex_;
+ std::condition_variable cond_;
+ // mutex_ protects queue_ and active_requests_ access.
+ std::vector<struct target_proc> queue_;
+ int active_requests_;
+ // write side of the pipe to communicate kill failures with the main thread
+ int comm_fd_;
+ int thread_cnt_;
+ pthread_t* thread_pool_;
+ bool debug_enabled_;
+
+ bool async_kill(const struct target_proc& target);
+public:
+ Reaper() : active_requests_(0), thread_cnt_(0), debug_enabled_(false) {}
+
+ static bool is_reaping_supported();
+
+ bool init(int comm_fd);
+ int thread_cnt() const { return thread_cnt_; }
+ void enable_debug(bool enable) { debug_enabled_ = enable; }
+ bool debug_enabled() const { return debug_enabled_; }
+
+ // return 0 on success or error code returned by the syscall
+ int kill(const struct target_proc& target, bool synchronous);
+ // below members are used only by reaper_main
+ target_proc dequeue_request();
+ void request_complete();
+ void notify_kill_failure(int pid);
+};
diff --git a/statslog.cpp b/statslog.cpp
index 6568f73..26a6d86 100644
--- a/statslog.cpp
+++ b/statslog.cpp
@@ -30,6 +30,10 @@
#include <time.h>
#include <unistd.h>
+#include <string>
+
+#include <processgroup/processgroup.h>
+
#ifdef LMKD_LOG_STATS
#define STRINGIFY(x) STRINGIFY_INTERNAL(x)
@@ -85,18 +89,20 @@
mem_st->swap_in_bytes = value;
}
-static int memory_stat_from_cgroup(struct memory_stat* mem_st, int pid, uid_t uid) {
- FILE *fp;
- char buf[PATH_MAX];
+static int memory_stat_from_cgroup(struct memory_stat* mem_st, int pid, uid_t uid __unused) {
+ std::string path;
+ if (!CgroupGetAttributePathForTask("MemStats", pid, &path)) {
+ ALOGE("Querying MemStats path failed");
+ return -1;
+ }
- snprintf(buf, sizeof(buf), MEMCG_PROCESS_MEMORY_STAT_PATH, uid, pid);
-
- fp = fopen(buf, "r");
+ FILE* fp = fopen(path.c_str(), "r");
if (fp == NULL) {
return -1;
}
+ char buf[PAGE_SIZE];
while (fgets(buf, PAGE_SIZE, fp) != NULL) {
memory_stat_parse_line(buf, mem_st);
}
diff --git a/statslog.h b/statslog.h
index 89e4d2e..e3f8b72 100644
--- a/statslog.h
+++ b/statslog.h
@@ -92,7 +92,6 @@
#ifdef LMKD_LOG_STATS
-#define MEMCG_PROCESS_MEMORY_STAT_PATH "/dev/memcg/apps/uid_%u/pid_%d/memory.stat"
#define PROC_STAT_FILE_PATH "/proc/%d/stat"
#define PROC_STAT_BUFFER_SIZE 1024
#define BYTES_IN_KILOBYTE 1024
diff --git a/tests/Android.bp b/tests/Android.bp
index dfbe0c7..effdac7 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -23,6 +23,7 @@
"libbase",
"liblog",
"libcutils",
+ "libprocessgroup",
],
static_libs: [
@@ -43,3 +44,34 @@
compile_multilib: "first",
}
+
+cc_test {
+ name: "lmkd_tests",
+ test_suites: ["device-tests"],
+ require_root: true,
+
+ shared_libs: [
+ "libbase",
+ "liblog",
+ "libcutils",
+ "libprocessgroup",
+ ],
+
+ static_libs: [
+ "liblmkd_utils",
+ ],
+
+ target: {
+ android: {
+ srcs: ["lmkd_tests.cpp"],
+ },
+ },
+
+ cflags: [
+ "-Wall",
+ "-Wextra",
+ "-Werror",
+ ],
+
+ compile_multilib: "first",
+}
diff --git a/tests/TEST_MAPPING b/tests/TEST_MAPPING
new file mode 100644
index 0000000..7c2533b
--- /dev/null
+++ b/tests/TEST_MAPPING
@@ -0,0 +1,7 @@
+{
+ "presubmit-large": [
+ {
+ "name": "lmkd_tests"
+ }
+ ]
+}
diff --git a/tests/lmkd_tests.cpp b/tests/lmkd_tests.cpp
new file mode 100644
index 0000000..0676d85
--- /dev/null
+++ b/tests/lmkd_tests.cpp
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sstream>
+#include <string>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+#include <cutils/properties.h>
+#include <gtest/gtest.h>
+#include <liblmkd_utils.h>
+#include <log/log_properties.h>
+#include <private/android_filesystem_config.h>
+
+using namespace android::base;
+
+#ifndef __NR_process_mrelease
+#define __NR_process_mrelease 448
+#endif
+
+#define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree"
+
+#define LMKD_LOGCAT_MARKER "lowmemorykiller"
+#define LMKD_KILL_TEMPLATE "Kill \'[^']*\' \\\(%d\\)"
+#define LMKD_REAP_TEMPLATE "Process %d was reaped"
+
+#define LMKD_KILL_LINE_START LMKD_LOGCAT_MARKER ": Kill"
+#define LMKD_REAP_LINE_START LMKD_LOGCAT_MARKER ": Process"
+#define LMKD_REAP_TIME_TEMPLATE LMKD_LOGCAT_MARKER ": Process %d was reaped in %ldms"
+
+#define ONE_MB (1 << 20)
+
+// Test constant parameters
+#define OOM_ADJ_MAX 1000
+#define ALLOC_STEP (5 * ONE_MB)
+#define ALLOC_DELAY 200
+
+// used to create ptr aliasing and prevent compiler optimizing the access
+static volatile void* gptr;
+
+class LmkdTest : public ::testing::Test {
+ public:
+ virtual void SetUp() {
+ // test requirements
+ if (getuid() != static_cast<unsigned>(AID_ROOT)) {
+ GTEST_SKIP() << "Must be root, skipping test";
+ }
+
+ if (!__android_log_is_debuggable()) {
+ GTEST_SKIP() << "Must be userdebug build, skipping test";
+ }
+
+ if (!access(INKERNEL_MINFREE_PATH, W_OK)) {
+ GTEST_SKIP() << "Must not have kernel lowmemorykiller driver,"
+ << " skipping test";
+ }
+
+ // should be able to turn on lmkd debug information
+ if (!property_get_bool("ro.lmk.debug", true)) {
+ GTEST_SKIP() << "Can't run with ro.lmk.debug property set to 'false', skipping test";
+ }
+
+ // setup lmkd connection
+ ASSERT_FALSE((sock = lmkd_connect()) < 0)
+ << "Failed to connect to lmkd process, err=" << strerror(errno);
+
+ // enable ro.lmk.debug if not already enabled
+ if (!property_get_bool("ro.lmk.debug", false)) {
+ EXPECT_EQ(property_set("ro.lmk.debug", "true"), 0);
+ EXPECT_EQ(lmkd_update_props(sock), UPDATE_PROPS_SUCCESS)
+ << "Failed to reinitialize lmkd";
+ }
+
+ uid = getuid();
+ }
+
+ virtual void TearDown() {
+ // drop lmkd connection
+ close(sock);
+ }
+
+ void SetupChild(pid_t pid, int oomadj) {
+ struct lmk_procprio params;
+
+ params.pid = pid;
+ params.uid = uid;
+ params.oomadj = oomadj;
+ params.ptype = PROC_TYPE_APP;
+ ASSERT_FALSE(lmkd_register_proc(sock, ¶ms) < 0)
+ << "Failed to communicate with lmkd, err=" << strerror(errno);
+ GTEST_LOG_(INFO) << "Target process " << pid << " launched";
+ if (property_get_bool("ro.config.low_ram", false)) {
+ ASSERT_FALSE(create_memcg(uid, pid) != 0)
+ << "Target process " << pid << " failed to create a cgroup";
+ }
+ }
+
+ static std::string ExecCommand(const std::string& command) {
+ FILE* fp = popen(command.c_str(), "r");
+ std::string content;
+ ReadFdToString(fileno(fp), &content);
+ pclose(fp);
+ return content;
+ }
+
+ static std::string ReadLogcat(const std::string& tag, const std::string& regex) {
+ std::string cmd = "logcat -d -b all";
+ if (!tag.empty()) {
+ cmd += " -s \"" + tag + "\"";
+ }
+ if (!regex.empty()) {
+ cmd += " -e \"" + regex + "\"";
+ }
+ return ExecCommand(cmd);
+ }
+
+ static size_t ConsumeMemory(size_t total_size, size_t step_size, size_t step_delay) {
+ volatile void* ptr;
+ size_t allocated_size = 0;
+
+ while (allocated_size < total_size) {
+ ptr = mmap(NULL, step_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (ptr != MAP_FAILED) {
+ // create ptr aliasing to prevent compiler optimizing the access
+ gptr = ptr;
+ // make data non-zero
+ memset((void*)ptr, (int)(allocated_size + 1), step_size);
+ allocated_size += step_size;
+ }
+ usleep(step_delay);
+ }
+ return allocated_size;
+ }
+
+ static bool ParseProcSize(const std::string& line, long& rss, long& swap) {
+ size_t pos = line.find("to free");
+ if (pos == std::string::npos) {
+ return false;
+ }
+ return sscanf(line.c_str() + pos, "to free %ldkB rss, %ldkB swap", &rss, &swap) == 2;
+ }
+
+ static bool ParseReapTime(const std::string& line, pid_t pid, long& reap_time) {
+ int reap_pid;
+ return sscanf(line.c_str(), LMKD_REAP_TIME_TEMPLATE, &reap_pid, &reap_time) == 2 &&
+ reap_pid == pid;
+ }
+
+ private:
+ int sock;
+ uid_t uid;
+};
+
+TEST_F(LmkdTest, TargetReaping) {
+ // test specific requirements
+ if (syscall(__NR_process_mrelease, -1, 0) && errno == ENOSYS) {
+ GTEST_SKIP() << "Must support process_mrelease syscall, skipping test";
+ }
+
+ // for a child to act as a target process
+ pid_t pid = fork();
+ ASSERT_FALSE(pid < 0) << "Failed to spawn a child process, err=" << strerror(errno);
+ if (pid != 0) {
+ // parent
+ waitpid(pid, NULL, 0);
+ } else {
+ // child
+ SetupChild(getpid(), OOM_ADJ_MAX);
+ // allocate memory until killed
+ ConsumeMemory((size_t)-1, ALLOC_STEP, ALLOC_DELAY);
+ // should not reach here, child should be killed by OOM
+ FAIL() << "Target process " << pid << " was not killed";
+ }
+
+ std::string regex =
+ StringPrintf("((" LMKD_KILL_TEMPLATE ")|(" LMKD_REAP_TEMPLATE "))", pid, pid);
+ std::string logcat_out = ReadLogcat(LMKD_LOGCAT_MARKER ":I", regex);
+
+ // find kill report
+ size_t line_start = logcat_out.find(LMKD_KILL_LINE_START);
+ ASSERT_TRUE(line_start != std::string::npos) << "Kill report is not found";
+ size_t line_end = logcat_out.find('\n', line_start);
+ std::string line = logcat_out.substr(
+ line_start, line_end == std::string::npos ? std::string::npos : line_end - line_start);
+ long rss, swap;
+ ASSERT_TRUE(ParseProcSize(line, rss, swap)) << "Kill report format is invalid";
+
+ // find reap duration report
+ line_start = logcat_out.find(LMKD_REAP_LINE_START, line_end);
+ ASSERT_TRUE(line_start != std::string::npos) << "Reaping time report is not found";
+ line_end = logcat_out.find('\n', line_start);
+ line = logcat_out.substr(
+ line_start, line_end == std::string::npos ? std::string::npos : line_end - line_start);
+ long reap_time;
+ ASSERT_TRUE(ParseReapTime(line, pid, reap_time) && reap_time > 0)
+ << "Reaping time report format is invalid";
+
+ double reclaim_speed = ((double)rss + swap) / reap_time;
+ GTEST_LOG_(INFO) << "Reclaim speed " << reclaim_speed << "kB/ms (" << rss << "kB rss + " << swap
+ << "kB swap) / " << reap_time << "ms";
+}
+
+int main(int argc, char** argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+ InitLogging(argv, StderrLogger);
+ return RUN_ALL_TESTS();
+}
diff --git a/watchdog.cpp b/watchdog.cpp
new file mode 100644
index 0000000..b1e4a03
--- /dev/null
+++ b/watchdog.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2021 Google, Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "lowmemorykiller"
+
+#include <errno.h>
+#include <log/log.h>
+#include <string.h>
+
+#include <processgroup/processgroup.h>
+
+#include "watchdog.h"
+
+static void* watchdog_main(void* param) {
+ Watchdog *watchdog = static_cast<Watchdog*>(param);
+ sigset_t sigset;
+ int signum;
+
+ // Ensure the thread does not use little cores
+ if (!SetTaskProfiles(gettid(), {"CPUSET_SP_FOREGROUND"}, true)) {
+ ALOGE("Failed to assign cpuset to the watchdog thread");
+ }
+
+ if (!watchdog->create_timer(sigset)) {
+ ALOGE("Watchdog timer creation failed!");
+ return NULL;
+ }
+
+ while (true) {
+ if (sigwait(&sigset, &signum) == -1) {
+ ALOGE("sigwait failed: %s", strerror(errno));
+ }
+
+ watchdog->bite();
+ }
+
+ return NULL;
+}
+
+bool Watchdog::init() {
+ pthread_t thread;
+
+ if (pthread_create(&thread, NULL, watchdog_main, this)) {
+ ALOGE("pthread_create failed: %s", strerror(errno));
+ return false;
+ }
+ if (pthread_setname_np(thread, "lmkd_watchdog")) {
+ ALOGW("pthread_setname_np failed: %s", strerror(errno));
+ }
+
+ return true;
+}
+
+bool Watchdog::start() {
+ // Start the timer and keep it active until it's disarmed
+ struct itimerspec new_timer;
+
+ if (!timer_created_) {
+ return false;
+ }
+
+ new_timer.it_value.tv_sec = timeout_;
+ new_timer.it_value.tv_nsec = 0;
+ new_timer.it_interval.tv_sec = timeout_;
+ new_timer.it_interval.tv_nsec = 0;
+
+ if (timer_settime(timer_, 0, &new_timer, NULL)) {
+ ALOGE("timer_settime failed: %s", strerror(errno));
+ return false;
+ }
+
+ return true;
+}
+
+bool Watchdog::stop() {
+ struct itimerspec new_timer = {};
+
+ if (!timer_created_) {
+ return false;
+ }
+
+ if (timer_settime(timer_, 0, &new_timer, NULL)) {
+ ALOGE("timer_settime failed: %s", strerror(errno));
+ return false;
+ }
+
+ return true;
+}
+
+bool Watchdog::create_timer(sigset_t &sigset) {
+ struct sigevent sevent;
+
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIGALRM);
+ if (sigprocmask(SIG_BLOCK, &sigset, NULL)) {
+ ALOGE("sigprocmask failed: %s", strerror(errno));
+ return false;
+ }
+
+ sevent.sigev_notify = SIGEV_THREAD_ID;
+ sevent.sigev_notify_thread_id = gettid();
+ sevent.sigev_signo = SIGALRM;
+ if (timer_create(CLOCK_MONOTONIC, &sevent, &timer_)) {
+ ALOGE("timer_create failed: %s", strerror(errno));
+ return false;
+ }
+
+ timer_created_ = true;
+ return true;
+}
diff --git a/watchdog.h b/watchdog.h
new file mode 100644
index 0000000..34cb602
--- /dev/null
+++ b/watchdog.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2021 Google, Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <atomic>
+#include <signal.h>
+#include <time.h>
+
+class Watchdog {
+private:
+ int timeout_;
+ timer_t timer_;
+ std::atomic<bool> timer_created_;
+ void (*callback_)();
+public:
+ Watchdog(int timeout, void (*callback)()) :
+ timeout_(timeout), timer_created_(false), callback_(callback) {}
+
+ bool init();
+ bool start();
+ bool stop();
+ // used by the watchdog_main
+ bool create_timer(sigset_t &sigset);
+ void bite() const { if (callback_) callback_(); }
+};