mm/zcache.c - kernel/msm.git - Git at Google

 /*
  * linux/mm/zcache.c
  *
  * A cleancache backend for file pages compression.
  * Concepts based on original zcache by Dan Magenheimer.
  * Copyright (C) 2013  Bob Liu <bob.liu@xxxxxxxxxx>
  *
  * With zcache, active file pages can be compressed in memory during page
  * reclaiming. When their data is needed again the I/O reading operation is
  * avoided. This results in a significant performance gain under memory pressure
  * for systems with many file pages.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version 2
  * of the License, or (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
 */

 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

 #include <linux/atomic.h>
 #include <linux/cleancache.h>
 #include <linux/cpu.h>
 #include <linux/crypto.h>
 #include <linux/page-flags.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/mm_types.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/radix-tree.h>
 #include <linux/rbtree.h>
 #include <linux/types.h>
 #include <linux/zbud.h>

 /*
  * Enable/disable zcache (disabled by default)
  */
 static bool zcache_enabled __read_mostly;
 module_param_named(enabled, zcache_enabled, bool, 0);

 /*
  * Compressor to be used by zcache
  */
 #define ZCACHE_COMPRESSOR_DEFAULT "lzo"
 static char *zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
 module_param_named(compressor, zcache_compressor, charp, 0);

 /*
  * The maximum percentage of memory that the compressed pool can occupy.
  */
 static unsigned int zcache_max_pool_percent = 10;
 module_param_named(max_pool_percent, zcache_max_pool_percent, uint, 0644);

 static unsigned int zcache_clear_percent = 4;
 module_param_named(clear_percent, zcache_clear_percent, uint, 0644);
 /*
  * zcache statistics
  */
 static u64 zcache_pool_limit_hit;
 static u64 zcache_dup_entry;
 static u64 zcache_zbud_alloc_fail;
 static u64 zcache_evict_zpages;
 static u64 zcache_evict_filepages;
 static u64 zcache_inactive_pages_refused;
 static u64 zcache_reclaim_fail;
 static u64 zcache_pool_shrink;
 static u64 zcache_pool_shrink_fail;
 static u64 zcache_pool_shrink_pages;
 static u64 zcache_store_failed;
 static atomic_t zcache_stored_pages = ATOMIC_INIT(0);
 static atomic_t zcache_stored_zero_pages = ATOMIC_INIT(0);

 #define GFP_ZCACHE \
 	(__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | \
 		__GFP_NOMEMALLOC | __GFP_NO_KSWAPD | __GFP_ZERO)

 /*
  * Make sure this is different from radix tree
  * indirect ptr or exceptional entry.
  */
 #define ZERO_HANDLE	((void *)~(~0UL >> 1))

 /*
  * Zcache receives pages for compression through the Cleancache API and is able
  * to evict pages from its own compressed pool on an LRU basis in the case that
  * the compressed pool is full.
  *
  * Zcache makes use of zbud for the managing the compressed memory pool. Each
  * allocation in zbud is not directly accessible by address.  Rather, a handle
  * (zaddr) is return by the allocation routine and that handle(zaddr must be
  * mapped before being accessed. The compressed memory pool grows on demand and
  * shrinks as compressed pages are freed.
  *
  * When a file page is passed from cleancache to zcache, zcache maintains a
  * mapping of the <filesystem_type, inode_number, page_index> to the zbud
  * address that references that compressed file page. This mapping is achieved
  * with a red-black tree per filesystem type, plus a radix tree per red-black
  * node.
  *
  * A zcache pool with pool_id as the index is created when a filesystem mounted
  * Each zcache pool has a red-black tree, the inode number(rb_index) is the
  * search key. Each red-black tree node has a radix tree which use
  * page->index(ra_index) as the index. Each radix tree slot points to the zbud
  * address combining with some extra information(zcache_ra_handle).
  */
 #define MAX_ZCACHE_POOLS 32
 /*
  * One zcache_pool per (cleancache aware) filesystem mount instance
  */
 struct zcache_pool {
 	struct rb_root rbtree;
 	rwlock_t rb_lock;		/* Protects rbtree */
 	u64 size;
 	struct zbud_pool *pool;         /* Zbud pool used */
 };

 /*
  * Manage all zcache pools
  */
 struct _zcache {
 	struct zcache_pool *pools[MAX_ZCACHE_POOLS];
 	u32 num_pools;			/* Current no. of zcache pools */
 	spinlock_t pool_lock;		/* Protects pools[] and num_pools */
 };
 struct _zcache zcache;

 /*
  * Redblack tree node, each node has a page index radix-tree.
  * Indexed by inode nubmer.
  */
 struct zcache_rbnode {
 	struct rb_node rb_node;
 	int rb_index;
 	struct radix_tree_root ratree; /* Page radix tree per inode rbtree */
 	spinlock_t ra_lock;		/* Protects radix tree */
 	struct kref refcount;
 };

 /*
  * Radix-tree leaf, indexed by page->index
  */
 struct zcache_ra_handle {
 	int rb_index;			/* Redblack tree index */
 	int ra_index;			/* Radix tree index */
 	int zlen;			/* Compressed page size */
 	struct zcache_pool *zpool;	/* Finding zcache_pool during evict */
 };

 u64 zcache_pages(void)
 {
 	int i;
 	u64 count = 0;

 	for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
 		count += zcache.pools[i]->size;

 	return count;
 }

 static struct kmem_cache *zcache_rbnode_cache;
 static int zcache_rbnode_cache_create(void)
 {
 	zcache_rbnode_cache = KMEM_CACHE(zcache_rbnode, 0);
 	return zcache_rbnode_cache == NULL;
 }
 static void zcache_rbnode_cache_destroy(void)
 {
 	kmem_cache_destroy(zcache_rbnode_cache);
 }

 static unsigned long zcache_count(struct shrinker *s,
 				  struct shrink_control *sc)
 {
 	unsigned long active_file;
 	long file_gap;

 	active_file = global_page_state(NR_ACTIVE_FILE);
 	file_gap = zcache_pages() - active_file;
 	if (file_gap < 0)
 		file_gap = 0;
 	return file_gap;
 }

 static unsigned long zcache_scan(struct shrinker *s, struct shrink_control *sc)
 {
 	unsigned long active_file;
 	unsigned long file;
 	long file_gap;
 	unsigned long freed = 0;
 	unsigned long pool;
 	static bool running;
 	int i = 0;
 	int retries;

 	if (running)
 		goto end;

 	running = true;
 	active_file = global_page_state(NR_ACTIVE_FILE);
 	file = global_page_state(NR_FILE_PAGES);
 	pool = zcache_pages();

 	file_gap = pool - file;

 	if ((file_gap >= 0) &&
 		(totalram_pages * zcache_clear_percent / 100 > file)) {
 		file_gap = pool;
 		zcache_pool_shrink++;
 		goto reclaim;
 	}

 	/*
 	 * file_gap == 0 means that the number of pages
 	 * stored by zcache is around twice as many as the
 	 * number of active file pages.
 	 */
 	file_gap = pool - active_file;
 	if (file_gap < 0)
 		file_gap = 0;
 	else
 		zcache_pool_shrink++;

 reclaim:
 	retries = file_gap;
 	while ((file_gap > 0) && retries) {
 		struct zcache_pool *zpool =
 			zcache.pools[i++ % MAX_ZCACHE_POOLS];
 		if (!zpool || !zpool->size)
 			continue;
 		if (zbud_reclaim_page(zpool->pool, 8)) {
 			zcache_pool_shrink_fail++;
 			retries--;
 			continue;
 		}
 		freed++;
 		file_gap--;
 	}

 	zcache_pool_shrink_pages += freed;
 	for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
 		zcache.pools[i]->size =
 			zbud_get_pool_size(zcache.pools[i]->pool);

 	running = false;
 end:
 	return freed;
 }

 static struct shrinker zcache_shrinker = {
 	.scan_objects = zcache_scan,
 	.count_objects = zcache_count,
 	.seeks = DEFAULT_SEEKS * 16
 };

 /*
  * Compression functions
  * (Below functions are copyed from zswap!)
  */
 static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms;

 enum comp_op {
 	ZCACHE_COMPOP_COMPRESS,
 	ZCACHE_COMPOP_DECOMPRESS
 };

 static int zcache_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
 				u8 *dst, unsigned int *dlen)
 {
 	struct crypto_comp *tfm;
 	int ret;

 	tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu());
 	switch (op) {
 	case ZCACHE_COMPOP_COMPRESS:
 		ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
 		break;
 	case ZCACHE_COMPOP_DECOMPRESS:
 		ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
 		break;
 	default:
 		ret = -EINVAL;
 	}

 	put_cpu();
 	return ret;
 }

 static int __init zcache_comp_init(void)
 {
 	if (!crypto_has_comp(zcache_compressor, 0, 0)) {
 		pr_info("%s compressor not available\n", zcache_compressor);
 		/* fall back to default compressor */
 		zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
 		if (!crypto_has_comp(zcache_compressor, 0, 0))
 			/* can't even load the default compressor */
 			return -ENODEV;
 	}
 	pr_info("using %s compressor\n", zcache_compressor);

 	/* alloc percpu transforms */
 	zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
 	if (!zcache_comp_pcpu_tfms)
 		return -ENOMEM;
 	return 0;
 }

 static void zcache_comp_exit(void)
 {
 	/* free percpu transforms */
 	if (zcache_comp_pcpu_tfms)
 		free_percpu(zcache_comp_pcpu_tfms);
 }

 /*
  * Per-cpu code
  * (Below functions are also copyed from zswap!)
  */
 static DEFINE_PER_CPU(u8 *, zcache_dstmem);

 static int __zcache_cpu_notifier(unsigned long action, unsigned long cpu)
 {
 	struct crypto_comp *tfm;
 	u8 *dst;

 	switch (action) {
 	case CPU_UP_PREPARE:
 		tfm = crypto_alloc_comp(zcache_compressor, 0, 0);
 		if (IS_ERR(tfm)) {
 			pr_err("can't allocate compressor transform\n");
 			return NOTIFY_BAD;
 		}
 		*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm;
 		dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
 		if (!dst) {
 			pr_err("can't allocate compressor buffer\n");
 			crypto_free_comp(tfm);
 			*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
 			return NOTIFY_BAD;
 		}
 		per_cpu(zcache_dstmem, cpu) = dst;
 		break;
 	case CPU_DEAD:
 	case CPU_UP_CANCELED:
 		tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu);
 		if (tfm) {
 			crypto_free_comp(tfm);
 			*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
 		}
 		dst = per_cpu(zcache_dstmem, cpu);
 		kfree(dst);
 		per_cpu(zcache_dstmem, cpu) = NULL;
 		break;
 	default:
 		break;
 	}
 	return NOTIFY_OK;
 }

 static int zcache_cpu_notifier(struct notifier_block *nb,
 				unsigned long action, void *pcpu)
 {
 	unsigned long cpu = (unsigned long)pcpu;

 	return __zcache_cpu_notifier(action, cpu);
 }

 static struct notifier_block zcache_cpu_notifier_block = {
 	.notifier_call = zcache_cpu_notifier
 };

 static int zcache_cpu_init(void)
 {
 	unsigned long cpu;

 	get_online_cpus();
 	for_each_online_cpu(cpu)
 		if (__zcache_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
 			goto cleanup;
 	register_cpu_notifier(&zcache_cpu_notifier_block);
 	put_online_cpus();
 	return 0;

 cleanup:
 	for_each_online_cpu(cpu)
 		__zcache_cpu_notifier(CPU_UP_CANCELED, cpu);
 	put_online_cpus();
 	return -ENOMEM;
 }

 /*
  * Zcache helpers
  */
 static bool zcache_is_full(void)
 {
 	long file = global_page_state(NR_FILE_PAGES);

 	return ((totalram_pages * zcache_max_pool_percent / 100 <
 			zcache_pages()) ||
 			(totalram_pages * zcache_clear_percent / 100 >
 			file));
 }

 /*
  * The caller must hold zpool->rb_lock at least
  */
 static struct zcache_rbnode *zcache_find_rbnode(struct rb_root *rbtree,
 	int index, struct rb_node **rb_parent, struct rb_node ***rb_link)
 {
 	struct zcache_rbnode *entry;
 	struct rb_node **__rb_link, *__rb_parent, *rb_prev;

 	__rb_link = &rbtree->rb_node;
 	rb_prev = __rb_parent = NULL;

 	while (*__rb_link) {
 		__rb_parent = *__rb_link;
 		entry = rb_entry(__rb_parent, struct zcache_rbnode, rb_node);
 		if (entry->rb_index > index)
 			__rb_link = &__rb_parent->rb_left;
 		else if (entry->rb_index < index) {
 			rb_prev = __rb_parent;
 			__rb_link = &__rb_parent->rb_right;
 		} else
 			return entry;
 	}

 	if (rb_parent)
 		*rb_parent = __rb_parent;
 	if (rb_link)
 		*rb_link = __rb_link;
 	return NULL;
 }

 static struct zcache_rbnode *zcache_find_get_rbnode(struct zcache_pool *zpool,
 					int rb_index)
 {
 	unsigned long flags;
 	struct zcache_rbnode *rbnode;

 	read_lock_irqsave(&zpool->rb_lock, flags);
 	rbnode = zcache_find_rbnode(&zpool->rbtree, rb_index, 0, 0);
 	if (rbnode)
 		kref_get(&rbnode->refcount);
 	read_unlock_irqrestore(&zpool->rb_lock, flags);
 	return rbnode;
 }

 /*
  * kref_put callback for zcache_rbnode.
  *
  * The rbnode must have been isolated from rbtree already.
  */
 static void zcache_rbnode_release(struct kref *kref)
 {
 	struct zcache_rbnode *rbnode;

 	rbnode = container_of(kref, struct zcache_rbnode, refcount);
 	BUG_ON(rbnode->ratree.rnode);
 	kmem_cache_free(zcache_rbnode_cache, rbnode);
 }

 /*
  * Check whether the radix-tree of this rbnode is empty.
  * If that's true, then we can delete this zcache_rbnode from
  * zcache_pool->rbtree
  *
  * Caller must hold zcache_rbnode->ra_lock
  */
 static int zcache_rbnode_empty(struct zcache_rbnode *rbnode)
 {
 	return rbnode->ratree.rnode == NULL;
 }

 /*
  * Remove zcache_rbnode from zpool->rbtree
  *
  * holded_rblock - whether the caller has holded zpool->rb_lock
  */
 static void zcache_rbnode_isolate(struct zcache_pool *zpool,
 		struct zcache_rbnode *rbnode, bool holded_rblock)
 {
 	unsigned long flags;

 	if (!holded_rblock)
 		write_lock_irqsave(&zpool->rb_lock, flags);
 	/*
 	 * Someone can get reference on this rbnode before we could
 	 * acquire write lock above.
 	 * We want to remove it from zpool->rbtree when only the caller and
 	 * corresponding ratree holds a reference to this rbnode.
 	 * Below check ensures that a racing zcache put will not end up adding
 	 * a page to an isolated node and thereby losing that memory.
 	 */
 	if (atomic_read(&rbnode->refcount.refcount) == 2) {
 		rb_erase(&rbnode->rb_node, &zpool->rbtree);
 		RB_CLEAR_NODE(&rbnode->rb_node);
 		kref_put(&rbnode->refcount, zcache_rbnode_release);
 	}
 	if (!holded_rblock)
 		write_unlock_irqrestore(&zpool->rb_lock, flags);
 }

 /*
  * Store zaddr which allocated by zbud_alloc() to the hierarchy rbtree-ratree.
  */
 static int zcache_store_zaddr(struct zcache_pool *zpool,
 		int ra_index, int rb_index, unsigned long zaddr)
 {
 	unsigned long flags;
 	struct zcache_rbnode *rbnode, *tmp;
 	struct rb_node **link = NULL, *parent = NULL;
 	int ret;
 	void *dup_zaddr;

 	rbnode = zcache_find_get_rbnode(zpool, rb_index);
 	if (!rbnode) {
 		/* alloc and init a new rbnode */
 		rbnode = kmem_cache_alloc(zcache_rbnode_cache,
 			GFP_ZCACHE);
 		if (!rbnode)
 			return -ENOMEM;

 		INIT_RADIX_TREE(&rbnode->ratree, GFP_ATOMIC|__GFP_NOWARN);
 		spin_lock_init(&rbnode->ra_lock);
 		rbnode->rb_index = rb_index;
 		kref_init(&rbnode->refcount);
 		RB_CLEAR_NODE(&rbnode->rb_node);

 		/* add that rbnode to rbtree */
 		write_lock_irqsave(&zpool->rb_lock, flags);
 		tmp = zcache_find_rbnode(&zpool->rbtree, rb_index,
 				&parent, &link);
 		if (tmp) {
 			/* somebody else allocated new rbnode */
 			kmem_cache_free(zcache_rbnode_cache, rbnode);
 			rbnode = tmp;
 		} else {
 			rb_link_node(&rbnode->rb_node, parent, link);
 			rb_insert_color(&rbnode->rb_node, &zpool->rbtree);
 		}

 		/* Inc the reference of this zcache_rbnode */
 		kref_get(&rbnode->refcount);
 		write_unlock_irqrestore(&zpool->rb_lock, flags);
 	}

 	/* Succfully got a zcache_rbnode when arriving here */
 	spin_lock_irqsave(&rbnode->ra_lock, flags);
 	dup_zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
 	if (unlikely(dup_zaddr)) {
 		WARN_ON("duplicated, will be replaced!\n");
 		if (dup_zaddr == ZERO_HANDLE) {
 			atomic_dec(&zcache_stored_zero_pages);
 		} else {
 			zbud_free(zpool->pool, (unsigned long)dup_zaddr);
 			atomic_dec(&zcache_stored_pages);
 			zpool->size = zbud_get_pool_size(zpool->pool);
 		}
 		zcache_dup_entry++;
 	}

 	/* Insert zcache_ra_handle to ratree */
 	ret = radix_tree_insert(&rbnode->ratree, ra_index,
 				(void *)zaddr);
 	spin_unlock_irqrestore(&rbnode->ra_lock, flags);
 	if (unlikely(ret)) {
 		write_lock_irqsave(&zpool->rb_lock, flags);
 		spin_lock(&rbnode->ra_lock);

 		if (zcache_rbnode_empty(rbnode))
 			zcache_rbnode_isolate(zpool, rbnode, 1);

 		spin_unlock(&rbnode->ra_lock);
 		write_unlock_irqrestore(&zpool->rb_lock, flags);
 	}

 	kref_put(&rbnode->refcount, zcache_rbnode_release);
 	return ret;
 }

 /*
  * Load zaddr and delete it from radix tree.
  * If the radix tree of the corresponding rbnode is empty, delete the rbnode
  * from zpool->rbtree also.
  */
 static void *zcache_load_delete_zaddr(struct zcache_pool *zpool,
 				int rb_index, int ra_index)
 {
 	struct zcache_rbnode *rbnode;
 	void *zaddr = NULL;
 	unsigned long flags;

 	rbnode = zcache_find_get_rbnode(zpool, rb_index);
 	if (!rbnode)
 		goto out;

 	BUG_ON(rbnode->rb_index != rb_index);

 	spin_lock_irqsave(&rbnode->ra_lock, flags);
 	zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
 	spin_unlock_irqrestore(&rbnode->ra_lock, flags);

 	/* rb_lock and ra_lock must be taken again in the given sequence */
 	write_lock_irqsave(&zpool->rb_lock, flags);
 	spin_lock(&rbnode->ra_lock);
 	if (zcache_rbnode_empty(rbnode))
 		zcache_rbnode_isolate(zpool, rbnode, 1);
 	spin_unlock(&rbnode->ra_lock);
 	write_unlock_irqrestore(&zpool->rb_lock, flags);

 	kref_put(&rbnode->refcount, zcache_rbnode_release);
 out:
 	return zaddr;
 }

 static bool zero_page(struct page *page)
 {
 	unsigned long *ptr = kmap_atomic(page);
 	int i;
 	bool ret = false;

 	for (i = 0; i < PAGE_SIZE / sizeof(*ptr); i++) {
 		if (ptr[i])
 			goto out;
 	}
 	ret = true;
 out:
 	kunmap_atomic(ptr);
 	return ret;
 }

 static void zcache_store_page(int pool_id, struct cleancache_filekey key,
 		pgoff_t index, struct page *page)
 {
 	struct zcache_ra_handle *zhandle;
 	u8 *zpage, *src, *dst;
 	/* Address of zhandle + compressed data(zpage) */
 	unsigned long zaddr = 0;
 	unsigned int zlen = PAGE_SIZE;
 	bool zero = 0;
 	int ret;

 	struct zcache_pool *zpool = zcache.pools[pool_id];

 	/*
 	 * Zcache will be ineffective if the compressed memory pool is full with
 	 * compressed inactive file pages and most of them will never be used
 	 * again.
 	 * So we refuse to compress pages that are not from active file list.
 	 */
 	if (!PageWasActive(page)) {
 		zcache_inactive_pages_refused++;
 		return;
 	}

 	zero = zero_page(page);
 	if (zero)
 		goto zero;

 	if (zcache_is_full()) {
 		zcache_pool_limit_hit++;
 		if (zbud_reclaim_page(zpool->pool, 8)) {
 			zcache_reclaim_fail++;
 			return;
 		}
 		/*
 		 * Continue if reclaimed a page frame succ.
 		 */
 		zcache_evict_filepages++;
 		zpool->size = zbud_get_pool_size(zpool->pool);
 	}

 	/* compress */
 	dst = get_cpu_var(zcache_dstmem);
 	src = kmap_atomic(page);
 	ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, src, PAGE_SIZE, dst,
 			&zlen);
 	kunmap_atomic(src);
 	if (ret) {
 		pr_err("zcache compress error ret %d\n", ret);
 		put_cpu_var(zcache_dstmem);
 		return;
 	}

 	/* store zcache handle together with compressed page data */
 	ret = zbud_alloc(zpool->pool, zlen + sizeof(struct zcache_ra_handle),
 			GFP_ZCACHE, &zaddr);
 	if (ret) {
 		zcache_zbud_alloc_fail++;
 		put_cpu_var(zcache_dstmem);
 		return;
 	}

 	zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool, zaddr);

 	/* Compressed page data stored at the end of zcache_ra_handle */
 	zpage = (u8 *)(zhandle + 1);
 	memcpy(zpage, dst, zlen);
 	zbud_unmap(zpool->pool, zaddr);
 	put_cpu_var(zcache_dstmem);

 zero:
 	if (zero)
 		zaddr = (unsigned long)ZERO_HANDLE;

 	/* store zcache handle */
 	ret = zcache_store_zaddr(zpool, index, key.u.ino, zaddr);
 	if (ret) {
 		zcache_store_failed++;
 		if (!zero)
 			zbud_free(zpool->pool, zaddr);
 		return;
 	}

 	/* update stats */
 	if (zero) {
 		atomic_inc(&zcache_stored_zero_pages);
 	} else {
 		zhandle->ra_index = index;
 		zhandle->rb_index = key.u.ino;
 		zhandle->zlen = zlen;
 		zhandle->zpool = zpool;
 		atomic_inc(&zcache_stored_pages);
 		zpool->size = zbud_get_pool_size(zpool->pool);
 	}

 	return;
 }

 static int zcache_load_page(int pool_id, struct cleancache_filekey key,
 			pgoff_t index, struct page *page)
 {
 	int ret = 0;
 	u8 *src, *dst;
 	void *zaddr;
 	unsigned int dlen = PAGE_SIZE;
 	struct zcache_ra_handle *zhandle;
 	struct zcache_pool *zpool = zcache.pools[pool_id];

 	zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
 	if (!zaddr)
 		return -ENOENT;
 	else if (zaddr == ZERO_HANDLE)
 		goto map;

 	zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool,
 			(unsigned long)zaddr);
 	/* Compressed page data stored at the end of zcache_ra_handle */
 	src = (u8 *)(zhandle + 1);

 	/* decompress */
 map:
 	dst = kmap_atomic(page);
 	if (zaddr != ZERO_HANDLE) {
 		ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, src,
 				zhandle->zlen, dst, &dlen);
 	} else {
 		memset(dst, 0, PAGE_SIZE);
 		kunmap_atomic(dst);
 		flush_dcache_page(page);
 		atomic_dec(&zcache_stored_zero_pages);
 		goto out;
 	}
 	kunmap_atomic(dst);
 	zbud_unmap(zpool->pool, (unsigned long)zaddr);
 	zbud_free(zpool->pool, (unsigned long)zaddr);

 	BUG_ON(ret);
 	BUG_ON(dlen != PAGE_SIZE);

 	/* update stats */
 	atomic_dec(&zcache_stored_pages);
 	zpool->size = zbud_get_pool_size(zpool->pool);
 out:
 	SetPageWasActive(page);
 	return ret;
 }

 static void zcache_flush_page(int pool_id, struct cleancache_filekey key,
 			pgoff_t index)
 {
 	struct zcache_pool *zpool = zcache.pools[pool_id];
 	void *zaddr = NULL;

 	zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
 	if (zaddr && (zaddr != ZERO_HANDLE)) {
 		zbud_free(zpool->pool, (unsigned long)zaddr);
 		atomic_dec(&zcache_stored_pages);
 		zpool->size = zbud_get_pool_size(zpool->pool);
 	} else if (zaddr == ZERO_HANDLE) {
 		atomic_dec(&zcache_stored_zero_pages);
 	}
 }

 #define FREE_BATCH 16
 /*
  * Callers must hold the lock
  */
 static void zcache_flush_ratree(struct zcache_pool *zpool,
 		struct zcache_rbnode *rbnode)
 {
 	unsigned long index = 0;
 	int count, i;
 	struct zcache_ra_handle *zhandle;
 	void *zaddr = NULL;

 	do {
 		void *zaddrs[FREE_BATCH];
 		unsigned long indices[FREE_BATCH];

 		count = radix_tree_gang_lookup_index(&rbnode->ratree,
 				(void **)zaddrs, indices,
 				index, FREE_BATCH);

 		for (i = 0; i < count; i++) {
 			if (zaddrs[i] == ZERO_HANDLE) {
 				zaddr = radix_tree_delete(&rbnode->ratree,
 					indices[i]);
 				if (zaddr)
 					atomic_dec(&zcache_stored_zero_pages);
 				continue;
 			}
 			zhandle = (struct zcache_ra_handle *)zbud_map(
 					zpool->pool, (unsigned long)zaddrs[i]);
 			index = zhandle->ra_index;
 			zaddr = radix_tree_delete(&rbnode->ratree, index);
 			if (!zaddr)
 				continue;
 			zbud_unmap(zpool->pool, (unsigned long)zaddrs[i]);
 			zbud_free(zpool->pool, (unsigned long)zaddrs[i]);
 			atomic_dec(&zcache_stored_pages);
 			zpool->size = zbud_get_pool_size(zpool->pool);
 		}

 		index++;
 	} while (count == FREE_BATCH);
 }

 static void zcache_flush_inode(int pool_id, struct cleancache_filekey key)
 {
 	struct zcache_rbnode *rbnode;
 	unsigned long flags1, flags2;
 	struct zcache_pool *zpool = zcache.pools[pool_id];

 	/*
 	 * Refuse new pages added in to the same rbinode, so get rb_lock at
 	 * first.
 	 */
 	write_lock_irqsave(&zpool->rb_lock, flags1);
 	rbnode = zcache_find_rbnode(&zpool->rbtree, key.u.ino, 0, 0);
 	if (!rbnode) {
 		write_unlock_irqrestore(&zpool->rb_lock, flags1);
 		return;
 	}

 	kref_get(&rbnode->refcount);
 	spin_lock_irqsave(&rbnode->ra_lock, flags2);

 	zcache_flush_ratree(zpool, rbnode);
 	if (zcache_rbnode_empty(rbnode))
 		/* When arrvied here, we already hold rb_lock */
 		zcache_rbnode_isolate(zpool, rbnode, 1);

 	spin_unlock_irqrestore(&rbnode->ra_lock, flags2);
 	write_unlock_irqrestore(&zpool->rb_lock, flags1);
 	kref_put(&rbnode->refcount, zcache_rbnode_release);
 }

 static void zcache_destroy_pool(struct zcache_pool *zpool);
 static void zcache_flush_fs(int pool_id)
 {
 	struct zcache_rbnode *z_rbnode = NULL;
 	struct rb_node *rbnode;
 	unsigned long flags1, flags2;
 	struct zcache_pool *zpool;

 	if (pool_id < 0)
 		return;

 	zpool = zcache.pools[pool_id];
 	if (!zpool)
 		return;

 	/*
 	 * Refuse new pages added in, so get rb_lock at first.
 	 */
 	write_lock_irqsave(&zpool->rb_lock, flags1);

 	rbnode = rb_first(&zpool->rbtree);
 	while (rbnode) {
 		z_rbnode = rb_entry(rbnode, struct zcache_rbnode, rb_node);
 		rbnode = rb_next(rbnode);
 		if (z_rbnode) {
 			kref_get(&z_rbnode->refcount);
 			spin_lock_irqsave(&z_rbnode->ra_lock, flags2);
 			zcache_flush_ratree(zpool, z_rbnode);
 			if (zcache_rbnode_empty(z_rbnode))
 				zcache_rbnode_isolate(zpool, z_rbnode, 1);
 			spin_unlock_irqrestore(&z_rbnode->ra_lock, flags2);
 			kref_put(&z_rbnode->refcount, zcache_rbnode_release);
 		}
 	}

 	write_unlock_irqrestore(&zpool->rb_lock, flags1);
 	zcache_destroy_pool(zpool);
 }

 /*
  * Evict compressed pages from zcache pool on an LRU basis after the compressed
  * pool is full.
  */
 static int zcache_evict_zpage(struct zbud_pool *pool, unsigned long zaddr)
 {
 	struct zcache_pool *zpool;
 	struct zcache_ra_handle *zhandle;
 	void *zaddr_intree;

 	BUG_ON(zaddr == (unsigned long)ZERO_HANDLE);

 	zhandle = (struct zcache_ra_handle *)zbud_map(pool, zaddr);

 	zpool = zhandle->zpool;
 	/* There can be a race with zcache store */
 	if (!zpool)
 		return -EINVAL;

 	BUG_ON(pool != zpool->pool);

 	zaddr_intree = zcache_load_delete_zaddr(zpool, zhandle->rb_index,
 			zhandle->ra_index);
 	if (zaddr_intree) {
 		BUG_ON((unsigned long)zaddr_intree != zaddr);
 		zbud_unmap(pool, zaddr);
 		zbud_free(pool, zaddr);
 		atomic_dec(&zcache_stored_pages);
 		zpool->size = zbud_get_pool_size(pool);
 		zcache_evict_zpages++;
 	}
 	return 0;
 }

 static struct zbud_ops zcache_zbud_ops = {
 	.evict = zcache_evict_zpage
 };

 /* Return pool id */
 static int zcache_create_pool(void)
 {
 	int ret;
 	struct zcache_pool *zpool;

 	zpool = kzalloc(sizeof(*zpool), GFP_KERNEL);
 	if (!zpool) {
 		ret = -ENOMEM;
 		goto out;
 	}

 	zpool->pool = zbud_create_pool(GFP_KERNEL, &zcache_zbud_ops);
 	if (!zpool->pool) {
 		kfree(zpool);
 		ret = -ENOMEM;
 		goto out;
 	}

 	spin_lock(&zcache.pool_lock);
 	if (zcache.num_pools == MAX_ZCACHE_POOLS) {
 		pr_err("Cannot create new pool (limit:%u)\n", MAX_ZCACHE_POOLS);
 		zbud_destroy_pool(zpool->pool);
 		kfree(zpool);
 		ret = -EPERM;
 		goto out_unlock;
 	}

 	rwlock_init(&zpool->rb_lock);
 	zpool->rbtree = RB_ROOT;
 	/* Add to pool list */
 	for (ret = 0; ret < MAX_ZCACHE_POOLS; ret++)
 		if (!zcache.pools[ret])
 			break;
 	zcache.pools[ret] = zpool;
 	zcache.num_pools++;
 	pr_info("New pool created id:%d\n", ret);

 out_unlock:
 	spin_unlock(&zcache.pool_lock);
 out:
 	return ret;
 }

 static void zcache_destroy_pool(struct zcache_pool *zpool)
 {
 	int i;

 	if (!zpool)
 		return;

 	spin_lock(&zcache.pool_lock);
 	zcache.num_pools--;
 	for (i = 0; i < MAX_ZCACHE_POOLS; i++)
 		if (zcache.pools[i] == zpool)
 			break;
 	zcache.pools[i] = NULL;
 	spin_unlock(&zcache.pool_lock);

 	if (!RB_EMPTY_ROOT(&zpool->rbtree))
 		WARN_ON("Memory leak detected. Freeing non-empty pool!\n");

 	zbud_destroy_pool(zpool->pool);
 	kfree(zpool);
 }

 static int zcache_init_fs(size_t pagesize)
 {
 	int ret;

 	if (pagesize != PAGE_SIZE) {
 		pr_info("Unsupported page size: %zu", pagesize);
 		ret = -EINVAL;
 		goto out;
 	}

 	ret = zcache_create_pool();
 	if (ret < 0) {
 		pr_info("Failed to create new pool\n");
 		ret = -ENOMEM;
 		goto out;
 	}
 out:
 	return ret;
 }

 static int zcache_init_shared_fs(char *uuid, size_t pagesize)
 {
 	/* shared pools are unsupported and map to private */
 	return zcache_init_fs(pagesize);
 }

 static struct cleancache_ops zcache_ops = {
 	.put_page = zcache_store_page,
 	.get_page = zcache_load_page,
 	.invalidate_page = zcache_flush_page,
 	.invalidate_inode = zcache_flush_inode,
 	.invalidate_fs = zcache_flush_fs,
 	.init_shared_fs = zcache_init_shared_fs,
 	.init_fs = zcache_init_fs
 };

 /*
  * Debugfs functions
  */
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>

 static int pool_pages_get(void *_data, u64 *val)
 {
 	*val = zcache_pages();
 	return 0;
 }

 DEFINE_SIMPLE_ATTRIBUTE(pool_page_fops, pool_pages_get, NULL, "%llu\n");

 static struct dentry *zcache_debugfs_root;

 static int __init zcache_debugfs_init(void)
 {
 	if (!debugfs_initialized())
 		return -ENODEV;

 	zcache_debugfs_root = debugfs_create_dir("zcache", NULL);
 	if (!zcache_debugfs_root)
 		return -ENOMEM;

 	debugfs_create_u64("pool_limit_hit", S_IRUGO, zcache_debugfs_root,
 			&zcache_pool_limit_hit);
 	debugfs_create_u64("reject_alloc_fail", S_IRUGO, zcache_debugfs_root,
 			&zcache_zbud_alloc_fail);
 	debugfs_create_u64("duplicate_entry", S_IRUGO, zcache_debugfs_root,
 			&zcache_dup_entry);
 	debugfs_create_file("pool_pages", S_IRUGO, zcache_debugfs_root, NULL,
 			&pool_page_fops);
 	debugfs_create_atomic_t("stored_pages", S_IRUGO, zcache_debugfs_root,
 			&zcache_stored_pages);
 	debugfs_create_atomic_t("stored_zero_pages", S_IRUGO,
 			zcache_debugfs_root, &zcache_stored_zero_pages);
 	debugfs_create_u64("evicted_zpages", S_IRUGO, zcache_debugfs_root,
 			&zcache_evict_zpages);
 	debugfs_create_u64("evicted_filepages", S_IRUGO, zcache_debugfs_root,
 			&zcache_evict_filepages);
 	debugfs_create_u64("reclaim_fail", S_IRUGO, zcache_debugfs_root,
 			&zcache_reclaim_fail);
 	debugfs_create_u64("inactive_pages_refused", S_IRUGO,
 			zcache_debugfs_root, &zcache_inactive_pages_refused);
 	debugfs_create_u64("pool_shrink_count", S_IRUGO,
 			zcache_debugfs_root, &zcache_pool_shrink);
 	debugfs_create_u64("pool_shrink_fail", S_IRUGO,
 			zcache_debugfs_root, &zcache_pool_shrink_fail);
 	debugfs_create_u64("pool_shrink_pages", S_IRUGO,
 			zcache_debugfs_root, &zcache_pool_shrink_pages);
 	debugfs_create_u64("store_fail", S_IRUGO,
 			zcache_debugfs_root, &zcache_store_failed);
 	return 0;
 }

 static void __exit zcache_debugfs_exit(void)
 {
 	debugfs_remove_recursive(zcache_debugfs_root);
 }
 #else
 static int __init zcache_debugfs_init(void)
 {
 	return 0;
 }
 static void __exit zcache_debugfs_exit(void)
 {
 }
 #endif

 /*
  * zcache init and exit
  */
 static int __init init_zcache(void)
 {
 	if (!zcache_enabled)
 		return 0;

 	pr_info("loading zcache..\n");
 	if (zcache_rbnode_cache_create()) {
 		pr_err("entry cache creation failed\n");
 		goto error;
 	}

 	if (zcache_comp_init()) {
 		pr_err("compressor initialization failed\n");
 		goto compfail;
 	}
 	if (zcache_cpu_init()) {
 		pr_err("per-cpu initialization failed\n");
 		goto pcpufail;
 	}

 	spin_lock_init(&zcache.pool_lock);
 	cleancache_register_ops(&zcache_ops);

 	if (zcache_debugfs_init())
 		pr_warn("debugfs initialization failed\n");
 	register_shrinker(&zcache_shrinker);
 	return 0;
 pcpufail:
 	zcache_comp_exit();
 compfail:
 	zcache_rbnode_cache_destroy();
 error:
 	return -ENOMEM;
 }

 /* must be late so crypto has time to come up */
 late_initcall(init_zcache);

 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Bob Liu <bob.liu@xxxxxxxxxx>");
 MODULE_DESCRIPTION("Compressed cache for clean file pages");