blob: d1fa1e261311762167db2304aa381f858c8fba62 [file] [log] [blame]
/* drivers/misc/lowmemorykiller.c
*
* The lowmemorykiller driver lets user-space specify a set of memory thresholds
* where processes with a range of oom_score_adj values will get killed. Specify
* the minimum oom_score_adj values in
* /sys/module/lowmemorykiller/parameters/adj and the number of free pages in
* /sys/module/lowmemorykiller/parameters/minfree. Both files take a comma
* separated list of numbers in ascending order.
*
* For example, write "0,8" to /sys/module/lowmemorykiller/parameters/adj and
* "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill
* processes with a oom_score_adj value of 8 or higher when the free memory
* drops below 4096 pages and kill processes with a oom_score_adj value of 0 or
* higher when the free memory drops below 1024 pages.
*
* The driver considers memory used for caches to be free, but if a large
* percentage of the cached memory is locked this can be very inaccurate
* and processes may not get killed until the normal oom killer is triggered.
*
* Copyright (C) 2007-2008 Google, Inc.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/oom.h>
#include <linux/sched.h>
#include <linux/rcupdate.h>
#include <linux/notifier.h>
#include <linux/swap.h>
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/fs.h>
#include <linux/cpuset.h>
static uint32_t lowmem_debug_level = 1;
static int lowmem_adj[6] = {
0,
1,
6,
12,
};
static int lowmem_adj_size = 4;
static int lowmem_minfree[6] = {
3 * 512, /* 6MB */
2 * 1024, /* 8MB */
4 * 1024, /* 16MB */
16 * 1024, /* 64MB */
};
static int lowmem_minfree_size = 4;
static int lmk_fast_run = 1;
static unsigned long lowmem_deathpending_timeout;
#define lowmem_print(level, x...) \
do { \
if (lowmem_debug_level >= (level)) \
printk(x); \
} while (0)
static int test_task_flag(struct task_struct *p, int flag)
{
struct task_struct *t = p;
do {
task_lock(t);
if (test_tsk_thread_flag(t, flag)) {
task_unlock(t);
return 1;
}
task_unlock(t);
} while_each_thread(p, t);
return 0;
}
int can_use_cma_pages(gfp_t gfp_mask)
{
int can_use = 0;
int mtype = allocflags_to_migratetype(gfp_mask);
int i = 0;
int *mtype_fallbacks = get_migratetype_fallbacks(mtype);
if (is_migrate_cma(mtype)) {
can_use = 1;
} else {
for (i = 0;; i++) {
int fallbacktype = mtype_fallbacks[i];
if (is_migrate_cma(fallbacktype)) {
can_use = 1;
break;
}
if (fallbacktype == MIGRATE_RESERVE)
break;
}
}
return can_use;
}
void tune_lmk_zone_param(struct zonelist *zonelist, int classzone_idx,
int *other_free, int *other_file,
int use_cma_pages)
{
struct zone *zone;
struct zoneref *zoneref;
int zone_idx;
for_each_zone_zonelist(zone, zoneref, zonelist, MAX_NR_ZONES) {
if ((zone_idx = zonelist_zone_idx(zoneref)) == ZONE_MOVABLE) {
if (!use_cma_pages)
*other_free -=
zone_page_state(zone, NR_FREE_CMA_PAGES);
continue;
}
if (zone_idx > classzone_idx) {
if (other_free != NULL)
*other_free -= zone_page_state(zone,
NR_FREE_PAGES);
if (other_file != NULL)
*other_file -= zone_page_state(zone,
NR_FILE_PAGES)
- zone_page_state(zone, NR_SHMEM);
} else if (zone_idx < classzone_idx) {
if (zone_watermark_ok(zone, 0, 0, classzone_idx, 0)) {
if (!use_cma_pages) {
*other_free -= min(
zone->lowmem_reserve[classzone_idx] +
zone_page_state(
zone, NR_FREE_CMA_PAGES),
zone_page_state(
zone, NR_FREE_PAGES));
} else {
*other_free -=
zone->lowmem_reserve[classzone_idx];
}
} else {
*other_free -=
zone_page_state(zone, NR_FREE_PAGES);
}
}
}
}
#ifdef CONFIG_HIGHMEM
void adjust_gfp_mask(gfp_t *gfp_mask)
{
struct zone *preferred_zone;
struct zonelist *zonelist;
enum zone_type high_zoneidx;
if (current_is_kswapd()) {
zonelist = node_zonelist(0, *gfp_mask);
high_zoneidx = gfp_zone(*gfp_mask);
first_zones_zonelist(zonelist, high_zoneidx, NULL,
&preferred_zone);
if (high_zoneidx == ZONE_NORMAL) {
if (zone_watermark_ok_safe(preferred_zone, 0,
high_wmark_pages(preferred_zone), 0,
0))
*gfp_mask |= __GFP_HIGHMEM;
} else if (high_zoneidx == ZONE_HIGHMEM) {
*gfp_mask |= __GFP_HIGHMEM;
}
}
}
#else
void adjust_gfp_mask(gfp_t *unused)
{
}
#endif
void tune_lmk_param(int *other_free, int *other_file, struct shrink_control *sc)
{
gfp_t gfp_mask;
struct zone *preferred_zone;
struct zonelist *zonelist;
enum zone_type high_zoneidx, classzone_idx;
unsigned long balance_gap;
int use_cma_pages;
gfp_mask = sc->gfp_mask;
adjust_gfp_mask(&gfp_mask);
zonelist = node_zonelist(0, gfp_mask);
high_zoneidx = gfp_zone(gfp_mask);
first_zones_zonelist(zonelist, high_zoneidx, NULL, &preferred_zone);
classzone_idx = zone_idx(preferred_zone);
use_cma_pages = can_use_cma_pages(gfp_mask);
balance_gap = min(low_wmark_pages(preferred_zone),
(preferred_zone->present_pages +
KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
KSWAPD_ZONE_BALANCE_GAP_RATIO);
if (likely(current_is_kswapd() && zone_watermark_ok(preferred_zone, 0,
high_wmark_pages(preferred_zone) + SWAP_CLUSTER_MAX +
balance_gap, 0, 0))) {
if (lmk_fast_run)
tune_lmk_zone_param(zonelist, classzone_idx, other_free,
other_file, use_cma_pages);
else
tune_lmk_zone_param(zonelist, classzone_idx, other_free,
NULL, use_cma_pages);
if (zone_watermark_ok(preferred_zone, 0, 0, ZONE_HIGHMEM, 0)) {
if (!use_cma_pages) {
*other_free -= min(
preferred_zone->lowmem_reserve[ZONE_HIGHMEM]
+ zone_page_state(
preferred_zone, NR_FREE_CMA_PAGES),
zone_page_state(
preferred_zone, NR_FREE_PAGES));
} else {
*other_free -=
preferred_zone->lowmem_reserve[ZONE_HIGHMEM];
}
} else {
*other_free -= zone_page_state(preferred_zone,
NR_FREE_PAGES);
}
lowmem_print(4, "lowmem_shrink of kswapd tunning for highmem "
"ofree %d, %d\n", *other_free, *other_file);
} else {
tune_lmk_zone_param(zonelist, classzone_idx, other_free,
other_file, use_cma_pages);
if (!use_cma_pages) {
*other_free -=
zone_page_state(preferred_zone, NR_FREE_CMA_PAGES);
}
lowmem_print(4, "lowmem_shrink tunning for others ofree %d, "
"%d\n", *other_free, *other_file);
}
}
static DEFINE_MUTEX(scan_mutex);
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
{
struct task_struct *tsk;
struct task_struct *selected = NULL;
int rem = 0;
int tasksize;
int i;
int min_score_adj = OOM_SCORE_ADJ_MAX + 1;
int minfree = 0;
int selected_tasksize = 0;
int selected_oom_score_adj;
int array_size = ARRAY_SIZE(lowmem_adj);
int other_free;
int other_file;
unsigned long nr_to_scan = sc->nr_to_scan;
if (nr_to_scan > 0) {
if (mutex_lock_interruptible(&scan_mutex) < 0)
return 0;
}
other_free = global_page_state(NR_FREE_PAGES);
other_file = global_page_state(NR_FILE_PAGES) -
global_page_state(NR_SHMEM);
tune_lmk_param(&other_free, &other_file, sc);
if (lowmem_adj_size < array_size)
array_size = lowmem_adj_size;
if (lowmem_minfree_size < array_size)
array_size = lowmem_minfree_size;
for (i = 0; i < array_size; i++) {
minfree = lowmem_minfree[i];
if (other_free < minfree && other_file < minfree) {
min_score_adj = lowmem_adj[i];
break;
}
}
if (nr_to_scan > 0)
lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n",
nr_to_scan, sc->gfp_mask, other_free,
other_file, min_score_adj);
rem = global_page_state(NR_ACTIVE_ANON) +
global_page_state(NR_ACTIVE_FILE) +
global_page_state(NR_INACTIVE_ANON) +
global_page_state(NR_INACTIVE_FILE);
if (nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n",
nr_to_scan, sc->gfp_mask, rem);
if (nr_to_scan > 0)
mutex_unlock(&scan_mutex);
return rem;
}
selected_oom_score_adj = min_score_adj;
rcu_read_lock();
for_each_process(tsk) {
struct task_struct *p;
int oom_score_adj;
if (tsk->flags & PF_KTHREAD)
continue;
/* if task no longer has any memory ignore it */
if (test_task_flag(tsk, TIF_MM_RELEASED))
continue;
if (time_before_eq(jiffies, lowmem_deathpending_timeout)) {
if (test_task_flag(tsk, TIF_MEMDIE)) {
rcu_read_unlock();
/* give the system time to free up the memory */
msleep_interruptible(20);
mutex_unlock(&scan_mutex);
return 0;
}
}
p = find_lock_task_mm(tsk);
if (!p)
continue;
oom_score_adj = p->signal->oom_score_adj;
if (oom_score_adj < min_score_adj) {
task_unlock(p);
continue;
}
tasksize = get_mm_rss(p->mm);
task_unlock(p);
if (tasksize <= 0)
continue;
if (selected) {
if (oom_score_adj < selected_oom_score_adj)
continue;
if (oom_score_adj == selected_oom_score_adj &&
tasksize <= selected_tasksize)
continue;
}
selected = p;
selected_tasksize = tasksize;
selected_oom_score_adj = oom_score_adj;
lowmem_print(3, "select '%s' (%d), adj %hd, size %d, to kill\n",
p->comm, p->pid, oom_score_adj, tasksize);
}
if (selected) {
lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \
" to free %ldkB on behalf of '%s' (%d) because\n" \
" cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \
" Free memory is %ldkB above reserved.\n" \
" Free CMA is %ldkB\n" \
" Total reserve is %ldkB\n" \
" Total free pages is %ldkB\n" \
" Total file cache is %ldkB\n" \
" GFP mask is 0x%x\n",
selected->comm, selected->pid,
selected_oom_score_adj,
selected_tasksize * (long)(PAGE_SIZE / 1024),
current->comm, current->pid,
other_file * (long)(PAGE_SIZE / 1024),
minfree * (long)(PAGE_SIZE / 1024),
min_score_adj,
other_free * (long)(PAGE_SIZE / 1024),
global_page_state(NR_FREE_CMA_PAGES) *
(long)(PAGE_SIZE / 1024),
totalreserve_pages * (long)(PAGE_SIZE / 1024),
global_page_state(NR_FREE_PAGES) *
(long)(PAGE_SIZE / 1024),
global_page_state(NR_FILE_PAGES) *
(long)(PAGE_SIZE / 1024),
sc->gfp_mask);
if (lowmem_debug_level >= 2 && selected_oom_score_adj == 0) {
show_mem(SHOW_MEM_FILTER_NODES);
dump_tasks(NULL, NULL);
}
lowmem_deathpending_timeout = jiffies + HZ;
send_sig(SIGKILL, selected, 0);
set_tsk_thread_flag(selected, TIF_MEMDIE);
rem -= selected_tasksize;
rcu_read_unlock();
/* give the system time to free up the memory */
msleep_interruptible(20);
} else
rcu_read_unlock();
lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
nr_to_scan, sc->gfp_mask, rem);
mutex_unlock(&scan_mutex);
return rem;
}
static struct shrinker lowmem_shrinker = {
.shrink = lowmem_shrink,
.seeks = DEFAULT_SEEKS * 16
};
static int __init lowmem_init(void)
{
register_shrinker(&lowmem_shrinker);
return 0;
}
static void __exit lowmem_exit(void)
{
unregister_shrinker(&lowmem_shrinker);
}
#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
static int lowmem_oom_adj_to_oom_score_adj(int oom_adj)
{
if (oom_adj == OOM_ADJUST_MAX)
return OOM_SCORE_ADJ_MAX;
else
return (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
}
static void lowmem_autodetect_oom_adj_values(void)
{
int i;
int oom_adj;
int oom_score_adj;
int array_size = ARRAY_SIZE(lowmem_adj);
if (lowmem_adj_size < array_size)
array_size = lowmem_adj_size;
if (array_size <= 0)
return;
oom_adj = lowmem_adj[array_size - 1];
if (oom_adj > OOM_ADJUST_MAX)
return;
oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
if (oom_score_adj <= OOM_ADJUST_MAX)
return;
lowmem_print(1, "lowmem_shrink: convert oom_adj to oom_score_adj:\n");
for (i = 0; i < array_size; i++) {
oom_adj = lowmem_adj[i];
oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj);
lowmem_adj[i] = oom_score_adj;
lowmem_print(1, "oom_adj %d => oom_score_adj %d\n",
oom_adj, oom_score_adj);
}
}
static int lowmem_adj_array_set(const char *val, const struct kernel_param *kp)
{
int ret;
ret = param_array_ops.set(val, kp);
/* HACK: Autodetect oom_adj values in lowmem_adj array */
lowmem_autodetect_oom_adj_values();
return ret;
}
static int lowmem_adj_array_get(char *buffer, const struct kernel_param *kp)
{
return param_array_ops.get(buffer, kp);
}
static void lowmem_adj_array_free(void *arg)
{
param_array_ops.free(arg);
}
static struct kernel_param_ops lowmem_adj_array_ops = {
.set = lowmem_adj_array_set,
.get = lowmem_adj_array_get,
.free = lowmem_adj_array_free,
};
static const struct kparam_array __param_arr_adj = {
.max = ARRAY_SIZE(lowmem_adj),
.num = &lowmem_adj_size,
.ops = &param_ops_int,
.elemsize = sizeof(lowmem_adj[0]),
.elem = lowmem_adj,
};
#endif
module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES
__module_param_call(MODULE_PARAM_PREFIX, adj,
&lowmem_adj_array_ops,
.arr = &__param_arr_adj,
S_IRUGO | S_IWUSR, -1);
__MODULE_PARM_TYPE(adj, "array of int");
#else
module_param_array_named(adj, lowmem_adj, int, &lowmem_adj_size,
S_IRUGO | S_IWUSR);
#endif
module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
S_IRUGO | S_IWUSR);
module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR);
module_param_named(lmk_fast_run, lmk_fast_run, int, S_IRUGO | S_IWUSR);
module_init(lowmem_init);
module_exit(lowmem_exit);
MODULE_LICENSE("GPL");