1156 lines
29 KiB
C
1156 lines
29 KiB
C
/*
|
|
* linux/mm/zcache.c
|
|
*
|
|
* A cleancache backend for file pages compression.
|
|
* Concepts based on original zcache by Dan Magenheimer.
|
|
* Copyright (C) 2013 Bob Liu <bob.liu@xxxxxxxxxx>
|
|
*
|
|
* With zcache, active file pages can be compressed in memory during page
|
|
* reclaiming. When their data is needed again the I/O reading operation is
|
|
* avoided. This results in a significant performance gain under memory pressure
|
|
* for systems with many file pages.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/atomic.h>
|
|
#include <linux/cleancache.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/crypto.h>
|
|
#include <linux/page-flags.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/mm_types.h>
|
|
#include <linux/module.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/radix-tree.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/types.h>
|
|
#include <linux/zbud.h>
|
|
|
|
/*
|
|
* Enable/disable zcache (disabled by default)
|
|
*/
|
|
static bool zcache_enabled __read_mostly;
|
|
module_param_named(enabled, zcache_enabled, bool, 0);
|
|
|
|
/*
|
|
* Compressor to be used by zcache
|
|
*/
|
|
#define ZCACHE_COMPRESSOR_DEFAULT "lzo"
|
|
static char *zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
|
|
module_param_named(compressor, zcache_compressor, charp, 0);
|
|
|
|
/*
|
|
* The maximum percentage of memory that the compressed pool can occupy.
|
|
*/
|
|
static unsigned int zcache_max_pool_percent = 10;
|
|
module_param_named(max_pool_percent, zcache_max_pool_percent, uint, 0644);
|
|
|
|
static unsigned int zcache_clear_percent = 4;
|
|
module_param_named(clear_percent, zcache_clear_percent, uint, 0644);
|
|
/*
|
|
* zcache statistics
|
|
*/
|
|
static u64 zcache_pool_limit_hit;
|
|
static u64 zcache_dup_entry;
|
|
static u64 zcache_zbud_alloc_fail;
|
|
static u64 zcache_evict_zpages;
|
|
static u64 zcache_evict_filepages;
|
|
static u64 zcache_inactive_pages_refused;
|
|
static u64 zcache_reclaim_fail;
|
|
static u64 zcache_pool_shrink;
|
|
static u64 zcache_pool_shrink_fail;
|
|
static u64 zcache_pool_shrink_pages;
|
|
static u64 zcache_store_failed;
|
|
static atomic_t zcache_stored_pages = ATOMIC_INIT(0);
|
|
static atomic_t zcache_stored_zero_pages = ATOMIC_INIT(0);
|
|
|
|
#define GFP_ZCACHE \
|
|
(__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | \
|
|
__GFP_NOMEMALLOC | __GFP_NO_KSWAPD | __GFP_ZERO)
|
|
|
|
/*
|
|
* Make sure this is different from radix tree
|
|
* indirect ptr or exceptional entry.
|
|
*/
|
|
#define ZERO_HANDLE ((void *)~(~0UL >> 1))
|
|
|
|
/*
|
|
* Zcache receives pages for compression through the Cleancache API and is able
|
|
* to evict pages from its own compressed pool on an LRU basis in the case that
|
|
* the compressed pool is full.
|
|
*
|
|
* Zcache makes use of zbud for the managing the compressed memory pool. Each
|
|
* allocation in zbud is not directly accessible by address. Rather, a handle
|
|
* (zaddr) is return by the allocation routine and that handle(zaddr must be
|
|
* mapped before being accessed. The compressed memory pool grows on demand and
|
|
* shrinks as compressed pages are freed.
|
|
*
|
|
* When a file page is passed from cleancache to zcache, zcache maintains a
|
|
* mapping of the <filesystem_type, inode_number, page_index> to the zbud
|
|
* address that references that compressed file page. This mapping is achieved
|
|
* with a red-black tree per filesystem type, plus a radix tree per red-black
|
|
* node.
|
|
*
|
|
* A zcache pool with pool_id as the index is created when a filesystem mounted
|
|
* Each zcache pool has a red-black tree, the inode number(rb_index) is the
|
|
* search key. Each red-black tree node has a radix tree which use
|
|
* page->index(ra_index) as the index. Each radix tree slot points to the zbud
|
|
* address combining with some extra information(zcache_ra_handle).
|
|
*/
|
|
#define MAX_ZCACHE_POOLS 32
|
|
/*
|
|
* One zcache_pool per (cleancache aware) filesystem mount instance
|
|
*/
|
|
struct zcache_pool {
|
|
struct rb_root rbtree;
|
|
rwlock_t rb_lock; /* Protects rbtree */
|
|
u64 size;
|
|
struct zbud_pool *pool; /* Zbud pool used */
|
|
};
|
|
|
|
/*
|
|
* Manage all zcache pools
|
|
*/
|
|
struct _zcache {
|
|
struct zcache_pool *pools[MAX_ZCACHE_POOLS];
|
|
u32 num_pools; /* Current no. of zcache pools */
|
|
spinlock_t pool_lock; /* Protects pools[] and num_pools */
|
|
};
|
|
struct _zcache zcache;
|
|
|
|
/*
|
|
* Redblack tree node, each node has a page index radix-tree.
|
|
* Indexed by inode nubmer.
|
|
*/
|
|
struct zcache_rbnode {
|
|
struct rb_node rb_node;
|
|
int rb_index;
|
|
struct radix_tree_root ratree; /* Page radix tree per inode rbtree */
|
|
spinlock_t ra_lock; /* Protects radix tree */
|
|
struct kref refcount;
|
|
};
|
|
|
|
/*
|
|
* Radix-tree leaf, indexed by page->index
|
|
*/
|
|
struct zcache_ra_handle {
|
|
int rb_index; /* Redblack tree index */
|
|
int ra_index; /* Radix tree index */
|
|
int zlen; /* Compressed page size */
|
|
struct zcache_pool *zpool; /* Finding zcache_pool during evict */
|
|
};
|
|
|
|
u64 zcache_pages(void)
|
|
{
|
|
int i;
|
|
u64 count = 0;
|
|
|
|
for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
|
|
count += zcache.pools[i]->size;
|
|
|
|
return count;
|
|
}
|
|
|
|
static struct kmem_cache *zcache_rbnode_cache;
|
|
static int zcache_rbnode_cache_create(void)
|
|
{
|
|
zcache_rbnode_cache = KMEM_CACHE(zcache_rbnode, 0);
|
|
return zcache_rbnode_cache == NULL;
|
|
}
|
|
static void zcache_rbnode_cache_destroy(void)
|
|
{
|
|
kmem_cache_destroy(zcache_rbnode_cache);
|
|
}
|
|
|
|
static unsigned long zcache_count(struct shrinker *s,
|
|
struct shrink_control *sc)
|
|
{
|
|
unsigned long active_file;
|
|
long file_gap;
|
|
|
|
active_file = global_page_state(NR_ACTIVE_FILE);
|
|
file_gap = zcache_pages() - active_file;
|
|
if (file_gap < 0)
|
|
file_gap = 0;
|
|
return file_gap;
|
|
}
|
|
|
|
static unsigned long zcache_scan(struct shrinker *s, struct shrink_control *sc)
|
|
{
|
|
unsigned long active_file;
|
|
unsigned long file;
|
|
long file_gap;
|
|
unsigned long freed = 0;
|
|
unsigned long pool;
|
|
static bool running;
|
|
int i = 0;
|
|
int retries;
|
|
|
|
if (running)
|
|
goto end;
|
|
|
|
running = true;
|
|
active_file = global_page_state(NR_ACTIVE_FILE);
|
|
file = global_page_state(NR_FILE_PAGES);
|
|
pool = zcache_pages();
|
|
|
|
file_gap = pool - file;
|
|
|
|
if ((file_gap >= 0) &&
|
|
(totalram_pages * zcache_clear_percent / 100 > file)) {
|
|
file_gap = pool;
|
|
zcache_pool_shrink++;
|
|
goto reclaim;
|
|
}
|
|
|
|
/*
|
|
* file_gap == 0 means that the number of pages
|
|
* stored by zcache is around twice as many as the
|
|
* number of active file pages.
|
|
*/
|
|
file_gap = pool - active_file;
|
|
if (file_gap < 0)
|
|
file_gap = 0;
|
|
else
|
|
zcache_pool_shrink++;
|
|
|
|
reclaim:
|
|
retries = file_gap;
|
|
while ((file_gap > 0) && retries) {
|
|
struct zcache_pool *zpool =
|
|
zcache.pools[i++ % MAX_ZCACHE_POOLS];
|
|
if (!zpool || !zpool->size)
|
|
continue;
|
|
if (zbud_reclaim_page(zpool->pool, 8)) {
|
|
zcache_pool_shrink_fail++;
|
|
retries--;
|
|
continue;
|
|
}
|
|
freed++;
|
|
file_gap--;
|
|
}
|
|
|
|
zcache_pool_shrink_pages += freed;
|
|
for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
|
|
zcache.pools[i]->size =
|
|
zbud_get_pool_size(zcache.pools[i]->pool);
|
|
|
|
running = false;
|
|
end:
|
|
return freed;
|
|
}
|
|
|
|
static struct shrinker zcache_shrinker = {
|
|
.scan_objects = zcache_scan,
|
|
.count_objects = zcache_count,
|
|
.seeks = DEFAULT_SEEKS * 16
|
|
};
|
|
|
|
/*
|
|
* Compression functions
|
|
* (Below functions are copyed from zswap!)
|
|
*/
|
|
static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms;
|
|
|
|
enum comp_op {
|
|
ZCACHE_COMPOP_COMPRESS,
|
|
ZCACHE_COMPOP_DECOMPRESS
|
|
};
|
|
|
|
static int zcache_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
|
|
u8 *dst, unsigned int *dlen)
|
|
{
|
|
struct crypto_comp *tfm;
|
|
int ret;
|
|
|
|
tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu());
|
|
switch (op) {
|
|
case ZCACHE_COMPOP_COMPRESS:
|
|
ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
|
|
break;
|
|
case ZCACHE_COMPOP_DECOMPRESS:
|
|
ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
|
|
break;
|
|
default:
|
|
ret = -EINVAL;
|
|
}
|
|
|
|
put_cpu();
|
|
return ret;
|
|
}
|
|
|
|
static int __init zcache_comp_init(void)
|
|
{
|
|
if (!crypto_has_comp(zcache_compressor, 0, 0)) {
|
|
pr_info("%s compressor not available\n", zcache_compressor);
|
|
/* fall back to default compressor */
|
|
zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
|
|
if (!crypto_has_comp(zcache_compressor, 0, 0))
|
|
/* can't even load the default compressor */
|
|
return -ENODEV;
|
|
}
|
|
pr_info("using %s compressor\n", zcache_compressor);
|
|
|
|
/* alloc percpu transforms */
|
|
zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
|
|
if (!zcache_comp_pcpu_tfms)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
static void zcache_comp_exit(void)
|
|
{
|
|
/* free percpu transforms */
|
|
if (zcache_comp_pcpu_tfms)
|
|
free_percpu(zcache_comp_pcpu_tfms);
|
|
}
|
|
|
|
/*
|
|
* Per-cpu code
|
|
* (Below functions are also copyed from zswap!)
|
|
*/
|
|
static DEFINE_PER_CPU(u8 *, zcache_dstmem);
|
|
|
|
static int __zcache_cpu_notifier(unsigned long action, unsigned long cpu)
|
|
{
|
|
struct crypto_comp *tfm;
|
|
u8 *dst;
|
|
|
|
switch (action) {
|
|
case CPU_UP_PREPARE:
|
|
tfm = crypto_alloc_comp(zcache_compressor, 0, 0);
|
|
if (IS_ERR(tfm)) {
|
|
pr_err("can't allocate compressor transform\n");
|
|
return NOTIFY_BAD;
|
|
}
|
|
*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm;
|
|
dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
|
|
if (!dst) {
|
|
pr_err("can't allocate compressor buffer\n");
|
|
crypto_free_comp(tfm);
|
|
*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
|
|
return NOTIFY_BAD;
|
|
}
|
|
per_cpu(zcache_dstmem, cpu) = dst;
|
|
break;
|
|
case CPU_DEAD:
|
|
case CPU_UP_CANCELED:
|
|
tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu);
|
|
if (tfm) {
|
|
crypto_free_comp(tfm);
|
|
*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
|
|
}
|
|
dst = per_cpu(zcache_dstmem, cpu);
|
|
kfree(dst);
|
|
per_cpu(zcache_dstmem, cpu) = NULL;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return NOTIFY_OK;
|
|
}
|
|
|
|
static int zcache_cpu_notifier(struct notifier_block *nb,
|
|
unsigned long action, void *pcpu)
|
|
{
|
|
unsigned long cpu = (unsigned long)pcpu;
|
|
|
|
return __zcache_cpu_notifier(action, cpu);
|
|
}
|
|
|
|
static struct notifier_block zcache_cpu_notifier_block = {
|
|
.notifier_call = zcache_cpu_notifier
|
|
};
|
|
|
|
static int zcache_cpu_init(void)
|
|
{
|
|
unsigned long cpu;
|
|
|
|
get_online_cpus();
|
|
for_each_online_cpu(cpu)
|
|
if (__zcache_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
|
|
goto cleanup;
|
|
register_cpu_notifier(&zcache_cpu_notifier_block);
|
|
put_online_cpus();
|
|
return 0;
|
|
|
|
cleanup:
|
|
for_each_online_cpu(cpu)
|
|
__zcache_cpu_notifier(CPU_UP_CANCELED, cpu);
|
|
put_online_cpus();
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/*
|
|
* Zcache helpers
|
|
*/
|
|
static bool zcache_is_full(void)
|
|
{
|
|
long file = global_page_state(NR_FILE_PAGES);
|
|
|
|
return ((totalram_pages * zcache_max_pool_percent / 100 <
|
|
zcache_pages()) ||
|
|
(totalram_pages * zcache_clear_percent / 100 >
|
|
file));
|
|
}
|
|
|
|
/*
|
|
* The caller must hold zpool->rb_lock at least
|
|
*/
|
|
static struct zcache_rbnode *zcache_find_rbnode(struct rb_root *rbtree,
|
|
int index, struct rb_node **rb_parent, struct rb_node ***rb_link)
|
|
{
|
|
struct zcache_rbnode *entry;
|
|
struct rb_node **__rb_link, *__rb_parent, *rb_prev;
|
|
|
|
__rb_link = &rbtree->rb_node;
|
|
rb_prev = __rb_parent = NULL;
|
|
|
|
while (*__rb_link) {
|
|
__rb_parent = *__rb_link;
|
|
entry = rb_entry(__rb_parent, struct zcache_rbnode, rb_node);
|
|
if (entry->rb_index > index)
|
|
__rb_link = &__rb_parent->rb_left;
|
|
else if (entry->rb_index < index) {
|
|
rb_prev = __rb_parent;
|
|
__rb_link = &__rb_parent->rb_right;
|
|
} else
|
|
return entry;
|
|
}
|
|
|
|
if (rb_parent)
|
|
*rb_parent = __rb_parent;
|
|
if (rb_link)
|
|
*rb_link = __rb_link;
|
|
return NULL;
|
|
}
|
|
|
|
static struct zcache_rbnode *zcache_find_get_rbnode(struct zcache_pool *zpool,
|
|
int rb_index)
|
|
{
|
|
unsigned long flags;
|
|
struct zcache_rbnode *rbnode;
|
|
|
|
read_lock_irqsave(&zpool->rb_lock, flags);
|
|
rbnode = zcache_find_rbnode(&zpool->rbtree, rb_index, 0, 0);
|
|
if (rbnode)
|
|
kref_get(&rbnode->refcount);
|
|
read_unlock_irqrestore(&zpool->rb_lock, flags);
|
|
return rbnode;
|
|
}
|
|
|
|
/*
|
|
* kref_put callback for zcache_rbnode.
|
|
*
|
|
* The rbnode must have been isolated from rbtree already.
|
|
*/
|
|
static void zcache_rbnode_release(struct kref *kref)
|
|
{
|
|
struct zcache_rbnode *rbnode;
|
|
|
|
rbnode = container_of(kref, struct zcache_rbnode, refcount);
|
|
BUG_ON(rbnode->ratree.rnode);
|
|
kmem_cache_free(zcache_rbnode_cache, rbnode);
|
|
}
|
|
|
|
/*
|
|
* Check whether the radix-tree of this rbnode is empty.
|
|
* If that's true, then we can delete this zcache_rbnode from
|
|
* zcache_pool->rbtree
|
|
*
|
|
* Caller must hold zcache_rbnode->ra_lock
|
|
*/
|
|
static int zcache_rbnode_empty(struct zcache_rbnode *rbnode)
|
|
{
|
|
return rbnode->ratree.rnode == NULL;
|
|
}
|
|
|
|
/*
|
|
* Remove zcache_rbnode from zpool->rbtree
|
|
*
|
|
* holded_rblock - whether the caller has holded zpool->rb_lock
|
|
*/
|
|
static void zcache_rbnode_isolate(struct zcache_pool *zpool,
|
|
struct zcache_rbnode *rbnode, bool holded_rblock)
|
|
{
|
|
unsigned long flags;
|
|
|
|
if (!holded_rblock)
|
|
write_lock_irqsave(&zpool->rb_lock, flags);
|
|
/*
|
|
* Someone can get reference on this rbnode before we could
|
|
* acquire write lock above.
|
|
* We want to remove it from zpool->rbtree when only the caller and
|
|
* corresponding ratree holds a reference to this rbnode.
|
|
* Below check ensures that a racing zcache put will not end up adding
|
|
* a page to an isolated node and thereby losing that memory.
|
|
*/
|
|
if (atomic_read(&rbnode->refcount.refcount) == 2) {
|
|
rb_erase(&rbnode->rb_node, &zpool->rbtree);
|
|
RB_CLEAR_NODE(&rbnode->rb_node);
|
|
kref_put(&rbnode->refcount, zcache_rbnode_release);
|
|
}
|
|
if (!holded_rblock)
|
|
write_unlock_irqrestore(&zpool->rb_lock, flags);
|
|
}
|
|
|
|
/*
|
|
* Store zaddr which allocated by zbud_alloc() to the hierarchy rbtree-ratree.
|
|
*/
|
|
static int zcache_store_zaddr(struct zcache_pool *zpool,
|
|
int ra_index, int rb_index, unsigned long zaddr)
|
|
{
|
|
unsigned long flags;
|
|
struct zcache_rbnode *rbnode, *tmp;
|
|
struct rb_node **link = NULL, *parent = NULL;
|
|
int ret;
|
|
void *dup_zaddr;
|
|
|
|
rbnode = zcache_find_get_rbnode(zpool, rb_index);
|
|
if (!rbnode) {
|
|
/* alloc and init a new rbnode */
|
|
rbnode = kmem_cache_alloc(zcache_rbnode_cache,
|
|
GFP_ZCACHE);
|
|
if (!rbnode)
|
|
return -ENOMEM;
|
|
|
|
INIT_RADIX_TREE(&rbnode->ratree, GFP_ATOMIC|__GFP_NOWARN);
|
|
spin_lock_init(&rbnode->ra_lock);
|
|
rbnode->rb_index = rb_index;
|
|
kref_init(&rbnode->refcount);
|
|
RB_CLEAR_NODE(&rbnode->rb_node);
|
|
|
|
/* add that rbnode to rbtree */
|
|
write_lock_irqsave(&zpool->rb_lock, flags);
|
|
tmp = zcache_find_rbnode(&zpool->rbtree, rb_index,
|
|
&parent, &link);
|
|
if (tmp) {
|
|
/* somebody else allocated new rbnode */
|
|
kmem_cache_free(zcache_rbnode_cache, rbnode);
|
|
rbnode = tmp;
|
|
} else {
|
|
rb_link_node(&rbnode->rb_node, parent, link);
|
|
rb_insert_color(&rbnode->rb_node, &zpool->rbtree);
|
|
}
|
|
|
|
/* Inc the reference of this zcache_rbnode */
|
|
kref_get(&rbnode->refcount);
|
|
write_unlock_irqrestore(&zpool->rb_lock, flags);
|
|
}
|
|
|
|
/* Succfully got a zcache_rbnode when arriving here */
|
|
spin_lock_irqsave(&rbnode->ra_lock, flags);
|
|
dup_zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
|
|
if (unlikely(dup_zaddr)) {
|
|
WARN_ON("duplicated, will be replaced!\n");
|
|
if (dup_zaddr == ZERO_HANDLE) {
|
|
atomic_dec(&zcache_stored_zero_pages);
|
|
} else {
|
|
zbud_free(zpool->pool, (unsigned long)dup_zaddr);
|
|
atomic_dec(&zcache_stored_pages);
|
|
zpool->size = zbud_get_pool_size(zpool->pool);
|
|
}
|
|
zcache_dup_entry++;
|
|
}
|
|
|
|
/* Insert zcache_ra_handle to ratree */
|
|
ret = radix_tree_insert(&rbnode->ratree, ra_index,
|
|
(void *)zaddr);
|
|
if (unlikely(ret))
|
|
if (zcache_rbnode_empty(rbnode))
|
|
zcache_rbnode_isolate(zpool, rbnode, 0);
|
|
spin_unlock_irqrestore(&rbnode->ra_lock, flags);
|
|
|
|
kref_put(&rbnode->refcount, zcache_rbnode_release);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Load zaddr and delete it from radix tree.
|
|
* If the radix tree of the corresponding rbnode is empty, delete the rbnode
|
|
* from zpool->rbtree also.
|
|
*/
|
|
static void *zcache_load_delete_zaddr(struct zcache_pool *zpool,
|
|
int rb_index, int ra_index)
|
|
{
|
|
struct zcache_rbnode *rbnode;
|
|
void *zaddr = NULL;
|
|
unsigned long flags;
|
|
|
|
rbnode = zcache_find_get_rbnode(zpool, rb_index);
|
|
if (!rbnode)
|
|
goto out;
|
|
|
|
BUG_ON(rbnode->rb_index != rb_index);
|
|
|
|
spin_lock_irqsave(&rbnode->ra_lock, flags);
|
|
zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
|
|
if (zcache_rbnode_empty(rbnode))
|
|
zcache_rbnode_isolate(zpool, rbnode, 0);
|
|
spin_unlock_irqrestore(&rbnode->ra_lock, flags);
|
|
|
|
kref_put(&rbnode->refcount, zcache_rbnode_release);
|
|
out:
|
|
return zaddr;
|
|
}
|
|
|
|
static bool zero_page(struct page *page)
|
|
{
|
|
unsigned long *ptr = kmap_atomic(page);
|
|
int i;
|
|
bool ret = false;
|
|
|
|
for (i = 0; i < PAGE_SIZE / sizeof(*ptr); i++) {
|
|
if (ptr[i])
|
|
goto out;
|
|
}
|
|
ret = true;
|
|
out:
|
|
kunmap_atomic(ptr);
|
|
return ret;
|
|
}
|
|
|
|
static void zcache_store_page(int pool_id, struct cleancache_filekey key,
|
|
pgoff_t index, struct page *page)
|
|
{
|
|
struct zcache_ra_handle *zhandle;
|
|
u8 *zpage, *src, *dst;
|
|
/* Address of zhandle + compressed data(zpage) */
|
|
unsigned long zaddr = 0;
|
|
unsigned int zlen = PAGE_SIZE;
|
|
bool zero = 0;
|
|
int ret;
|
|
|
|
struct zcache_pool *zpool = zcache.pools[pool_id];
|
|
|
|
/*
|
|
* Zcache will be ineffective if the compressed memory pool is full with
|
|
* compressed inactive file pages and most of them will never be used
|
|
* again.
|
|
* So we refuse to compress pages that are not from active file list.
|
|
*/
|
|
if (!PageWasActive(page)) {
|
|
zcache_inactive_pages_refused++;
|
|
return;
|
|
}
|
|
|
|
zero = zero_page(page);
|
|
if (zero)
|
|
goto zero;
|
|
|
|
if (zcache_is_full()) {
|
|
zcache_pool_limit_hit++;
|
|
if (zbud_reclaim_page(zpool->pool, 8)) {
|
|
zcache_reclaim_fail++;
|
|
return;
|
|
}
|
|
/*
|
|
* Continue if reclaimed a page frame succ.
|
|
*/
|
|
zcache_evict_filepages++;
|
|
zpool->size = zbud_get_pool_size(zpool->pool);
|
|
}
|
|
|
|
/* compress */
|
|
dst = get_cpu_var(zcache_dstmem);
|
|
src = kmap_atomic(page);
|
|
ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, src, PAGE_SIZE, dst,
|
|
&zlen);
|
|
kunmap_atomic(src);
|
|
if (ret) {
|
|
pr_err("zcache compress error ret %d\n", ret);
|
|
put_cpu_var(zcache_dstmem);
|
|
return;
|
|
}
|
|
|
|
/* store zcache handle together with compressed page data */
|
|
ret = zbud_alloc(zpool->pool, zlen + sizeof(struct zcache_ra_handle),
|
|
GFP_ZCACHE, &zaddr);
|
|
if (ret) {
|
|
zcache_zbud_alloc_fail++;
|
|
put_cpu_var(zcache_dstmem);
|
|
return;
|
|
}
|
|
|
|
zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool, zaddr);
|
|
|
|
/* Compressed page data stored at the end of zcache_ra_handle */
|
|
zpage = (u8 *)(zhandle + 1);
|
|
memcpy(zpage, dst, zlen);
|
|
zbud_unmap(zpool->pool, zaddr);
|
|
put_cpu_var(zcache_dstmem);
|
|
|
|
zero:
|
|
if (zero)
|
|
zaddr = (unsigned long)ZERO_HANDLE;
|
|
|
|
/* store zcache handle */
|
|
ret = zcache_store_zaddr(zpool, index, key.u.ino, zaddr);
|
|
if (ret) {
|
|
zcache_store_failed++;
|
|
if (!zero)
|
|
zbud_free(zpool->pool, zaddr);
|
|
}
|
|
|
|
/* update stats */
|
|
if (zero) {
|
|
atomic_inc(&zcache_stored_zero_pages);
|
|
} else {
|
|
zhandle->ra_index = index;
|
|
zhandle->rb_index = key.u.ino;
|
|
zhandle->zlen = zlen;
|
|
zhandle->zpool = zpool;
|
|
atomic_inc(&zcache_stored_pages);
|
|
zpool->size = zbud_get_pool_size(zpool->pool);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
static int zcache_load_page(int pool_id, struct cleancache_filekey key,
|
|
pgoff_t index, struct page *page)
|
|
{
|
|
int ret = 0;
|
|
u8 *src, *dst;
|
|
void *zaddr;
|
|
unsigned int dlen = PAGE_SIZE;
|
|
struct zcache_ra_handle *zhandle;
|
|
struct zcache_pool *zpool = zcache.pools[pool_id];
|
|
|
|
zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
|
|
if (!zaddr)
|
|
return -ENOENT;
|
|
else if (zaddr == ZERO_HANDLE)
|
|
goto map;
|
|
|
|
zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool,
|
|
(unsigned long)zaddr);
|
|
/* Compressed page data stored at the end of zcache_ra_handle */
|
|
src = (u8 *)(zhandle + 1);
|
|
|
|
/* decompress */
|
|
map:
|
|
dst = kmap_atomic(page);
|
|
if (zaddr != ZERO_HANDLE) {
|
|
ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, src,
|
|
zhandle->zlen, dst, &dlen);
|
|
} else {
|
|
memset(dst, 0, PAGE_SIZE);
|
|
kunmap_atomic(dst);
|
|
flush_dcache_page(page);
|
|
atomic_dec(&zcache_stored_zero_pages);
|
|
goto out;
|
|
}
|
|
kunmap_atomic(dst);
|
|
zbud_unmap(zpool->pool, (unsigned long)zaddr);
|
|
zbud_free(zpool->pool, (unsigned long)zaddr);
|
|
|
|
BUG_ON(ret);
|
|
BUG_ON(dlen != PAGE_SIZE);
|
|
|
|
/* update stats */
|
|
atomic_dec(&zcache_stored_pages);
|
|
zpool->size = zbud_get_pool_size(zpool->pool);
|
|
out:
|
|
SetPageWasActive(page);
|
|
return ret;
|
|
}
|
|
|
|
static void zcache_flush_page(int pool_id, struct cleancache_filekey key,
|
|
pgoff_t index)
|
|
{
|
|
struct zcache_pool *zpool = zcache.pools[pool_id];
|
|
void *zaddr = NULL;
|
|
|
|
zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
|
|
if (zaddr && (zaddr != ZERO_HANDLE)) {
|
|
zbud_free(zpool->pool, (unsigned long)zaddr);
|
|
atomic_dec(&zcache_stored_pages);
|
|
zpool->size = zbud_get_pool_size(zpool->pool);
|
|
} else if (zaddr == ZERO_HANDLE) {
|
|
atomic_dec(&zcache_stored_zero_pages);
|
|
}
|
|
}
|
|
|
|
#define FREE_BATCH 16
|
|
/*
|
|
* Callers must hold the lock
|
|
*/
|
|
static void zcache_flush_ratree(struct zcache_pool *zpool,
|
|
struct zcache_rbnode *rbnode)
|
|
{
|
|
unsigned long index = 0;
|
|
int count, i;
|
|
struct zcache_ra_handle *zhandle;
|
|
void *zaddr = NULL;
|
|
|
|
do {
|
|
void *zaddrs[FREE_BATCH];
|
|
unsigned long indices[FREE_BATCH];
|
|
|
|
count = radix_tree_gang_lookup_index(&rbnode->ratree,
|
|
(void **)zaddrs, indices,
|
|
index, FREE_BATCH);
|
|
|
|
for (i = 0; i < count; i++) {
|
|
if (zaddrs[i] == ZERO_HANDLE) {
|
|
zaddr = radix_tree_delete(&rbnode->ratree,
|
|
indices[i]);
|
|
if (zaddr)
|
|
atomic_dec(&zcache_stored_zero_pages);
|
|
continue;
|
|
}
|
|
zhandle = (struct zcache_ra_handle *)zbud_map(
|
|
zpool->pool, (unsigned long)zaddrs[i]);
|
|
index = zhandle->ra_index;
|
|
zaddr = radix_tree_delete(&rbnode->ratree, index);
|
|
if (!zaddr)
|
|
continue;
|
|
zbud_unmap(zpool->pool, (unsigned long)zaddrs[i]);
|
|
zbud_free(zpool->pool, (unsigned long)zaddrs[i]);
|
|
atomic_dec(&zcache_stored_pages);
|
|
zpool->size = zbud_get_pool_size(zpool->pool);
|
|
}
|
|
|
|
index++;
|
|
} while (count == FREE_BATCH);
|
|
}
|
|
|
|
static void zcache_flush_inode(int pool_id, struct cleancache_filekey key)
|
|
{
|
|
struct zcache_rbnode *rbnode;
|
|
unsigned long flags1, flags2;
|
|
struct zcache_pool *zpool = zcache.pools[pool_id];
|
|
|
|
/*
|
|
* Refuse new pages added in to the same rbinode, so get rb_lock at
|
|
* first.
|
|
*/
|
|
write_lock_irqsave(&zpool->rb_lock, flags1);
|
|
rbnode = zcache_find_rbnode(&zpool->rbtree, key.u.ino, 0, 0);
|
|
if (!rbnode) {
|
|
write_unlock_irqrestore(&zpool->rb_lock, flags1);
|
|
return;
|
|
}
|
|
|
|
kref_get(&rbnode->refcount);
|
|
spin_lock_irqsave(&rbnode->ra_lock, flags2);
|
|
|
|
zcache_flush_ratree(zpool, rbnode);
|
|
if (zcache_rbnode_empty(rbnode))
|
|
/* When arrvied here, we already hold rb_lock */
|
|
zcache_rbnode_isolate(zpool, rbnode, 1);
|
|
|
|
spin_unlock_irqrestore(&rbnode->ra_lock, flags2);
|
|
write_unlock_irqrestore(&zpool->rb_lock, flags1);
|
|
kref_put(&rbnode->refcount, zcache_rbnode_release);
|
|
}
|
|
|
|
static void zcache_destroy_pool(struct zcache_pool *zpool);
|
|
static void zcache_flush_fs(int pool_id)
|
|
{
|
|
struct zcache_rbnode *z_rbnode = NULL;
|
|
struct rb_node *rbnode;
|
|
unsigned long flags1, flags2;
|
|
struct zcache_pool *zpool;
|
|
|
|
if (pool_id < 0)
|
|
return;
|
|
|
|
zpool = zcache.pools[pool_id];
|
|
if (!zpool)
|
|
return;
|
|
|
|
/*
|
|
* Refuse new pages added in, so get rb_lock at first.
|
|
*/
|
|
write_lock_irqsave(&zpool->rb_lock, flags1);
|
|
|
|
rbnode = rb_first(&zpool->rbtree);
|
|
while (rbnode) {
|
|
z_rbnode = rb_entry(rbnode, struct zcache_rbnode, rb_node);
|
|
rbnode = rb_next(rbnode);
|
|
if (z_rbnode) {
|
|
kref_get(&z_rbnode->refcount);
|
|
spin_lock_irqsave(&z_rbnode->ra_lock, flags2);
|
|
zcache_flush_ratree(zpool, z_rbnode);
|
|
if (zcache_rbnode_empty(z_rbnode))
|
|
zcache_rbnode_isolate(zpool, z_rbnode, 1);
|
|
spin_unlock_irqrestore(&z_rbnode->ra_lock, flags2);
|
|
kref_put(&z_rbnode->refcount, zcache_rbnode_release);
|
|
}
|
|
}
|
|
|
|
write_unlock_irqrestore(&zpool->rb_lock, flags1);
|
|
zcache_destroy_pool(zpool);
|
|
}
|
|
|
|
/*
|
|
* Evict compressed pages from zcache pool on an LRU basis after the compressed
|
|
* pool is full.
|
|
*/
|
|
static int zcache_evict_zpage(struct zbud_pool *pool, unsigned long zaddr)
|
|
{
|
|
struct zcache_pool *zpool;
|
|
struct zcache_ra_handle *zhandle;
|
|
void *zaddr_intree;
|
|
|
|
BUG_ON(zaddr == (unsigned long)ZERO_HANDLE);
|
|
|
|
zhandle = (struct zcache_ra_handle *)zbud_map(pool, zaddr);
|
|
|
|
zpool = zhandle->zpool;
|
|
/* There can be a race with zcache store */
|
|
if (!zpool)
|
|
return -EINVAL;
|
|
|
|
BUG_ON(pool != zpool->pool);
|
|
|
|
zaddr_intree = zcache_load_delete_zaddr(zpool, zhandle->rb_index,
|
|
zhandle->ra_index);
|
|
if (zaddr_intree) {
|
|
BUG_ON((unsigned long)zaddr_intree != zaddr);
|
|
zbud_unmap(pool, zaddr);
|
|
zbud_free(pool, zaddr);
|
|
atomic_dec(&zcache_stored_pages);
|
|
zpool->size = zbud_get_pool_size(pool);
|
|
zcache_evict_zpages++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static struct zbud_ops zcache_zbud_ops = {
|
|
.evict = zcache_evict_zpage
|
|
};
|
|
|
|
/* Return pool id */
|
|
static int zcache_create_pool(void)
|
|
{
|
|
int ret;
|
|
struct zcache_pool *zpool;
|
|
|
|
zpool = kzalloc(sizeof(*zpool), GFP_KERNEL);
|
|
if (!zpool) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
zpool->pool = zbud_create_pool(GFP_KERNEL, &zcache_zbud_ops);
|
|
if (!zpool->pool) {
|
|
kfree(zpool);
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
spin_lock(&zcache.pool_lock);
|
|
if (zcache.num_pools == MAX_ZCACHE_POOLS) {
|
|
pr_err("Cannot create new pool (limit:%u)\n", MAX_ZCACHE_POOLS);
|
|
zbud_destroy_pool(zpool->pool);
|
|
kfree(zpool);
|
|
ret = -EPERM;
|
|
goto out_unlock;
|
|
}
|
|
|
|
rwlock_init(&zpool->rb_lock);
|
|
zpool->rbtree = RB_ROOT;
|
|
/* Add to pool list */
|
|
for (ret = 0; ret < MAX_ZCACHE_POOLS; ret++)
|
|
if (!zcache.pools[ret])
|
|
break;
|
|
zcache.pools[ret] = zpool;
|
|
zcache.num_pools++;
|
|
pr_info("New pool created id:%d\n", ret);
|
|
|
|
out_unlock:
|
|
spin_unlock(&zcache.pool_lock);
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static void zcache_destroy_pool(struct zcache_pool *zpool)
|
|
{
|
|
int i;
|
|
|
|
if (!zpool)
|
|
return;
|
|
|
|
spin_lock(&zcache.pool_lock);
|
|
zcache.num_pools--;
|
|
for (i = 0; i < MAX_ZCACHE_POOLS; i++)
|
|
if (zcache.pools[i] == zpool)
|
|
break;
|
|
zcache.pools[i] = NULL;
|
|
spin_unlock(&zcache.pool_lock);
|
|
|
|
if (!RB_EMPTY_ROOT(&zpool->rbtree))
|
|
WARN_ON("Memory leak detected. Freeing non-empty pool!\n");
|
|
|
|
zbud_destroy_pool(zpool->pool);
|
|
kfree(zpool);
|
|
}
|
|
|
|
static int zcache_init_fs(size_t pagesize)
|
|
{
|
|
int ret;
|
|
|
|
if (pagesize != PAGE_SIZE) {
|
|
pr_info("Unsupported page size: %zu", pagesize);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
ret = zcache_create_pool();
|
|
if (ret < 0) {
|
|
pr_info("Failed to create new pool\n");
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static int zcache_init_shared_fs(char *uuid, size_t pagesize)
|
|
{
|
|
/* shared pools are unsupported and map to private */
|
|
return zcache_init_fs(pagesize);
|
|
}
|
|
|
|
static struct cleancache_ops zcache_ops = {
|
|
.put_page = zcache_store_page,
|
|
.get_page = zcache_load_page,
|
|
.invalidate_page = zcache_flush_page,
|
|
.invalidate_inode = zcache_flush_inode,
|
|
.invalidate_fs = zcache_flush_fs,
|
|
.init_shared_fs = zcache_init_shared_fs,
|
|
.init_fs = zcache_init_fs
|
|
};
|
|
|
|
/*
|
|
* Debugfs functions
|
|
*/
|
|
#ifdef CONFIG_DEBUG_FS
|
|
#include <linux/debugfs.h>
|
|
|
|
static int pool_pages_get(void *_data, u64 *val)
|
|
{
|
|
*val = zcache_pages();
|
|
return 0;
|
|
}
|
|
|
|
DEFINE_SIMPLE_ATTRIBUTE(pool_page_fops, pool_pages_get, NULL, "%llu\n");
|
|
|
|
static struct dentry *zcache_debugfs_root;
|
|
|
|
static int __init zcache_debugfs_init(void)
|
|
{
|
|
if (!debugfs_initialized())
|
|
return -ENODEV;
|
|
|
|
zcache_debugfs_root = debugfs_create_dir("zcache", NULL);
|
|
if (!zcache_debugfs_root)
|
|
return -ENOMEM;
|
|
|
|
debugfs_create_u64("pool_limit_hit", S_IRUGO, zcache_debugfs_root,
|
|
&zcache_pool_limit_hit);
|
|
debugfs_create_u64("reject_alloc_fail", S_IRUGO, zcache_debugfs_root,
|
|
&zcache_zbud_alloc_fail);
|
|
debugfs_create_u64("duplicate_entry", S_IRUGO, zcache_debugfs_root,
|
|
&zcache_dup_entry);
|
|
debugfs_create_file("pool_pages", S_IRUGO, zcache_debugfs_root, NULL,
|
|
&pool_page_fops);
|
|
debugfs_create_atomic_t("stored_pages", S_IRUGO, zcache_debugfs_root,
|
|
&zcache_stored_pages);
|
|
debugfs_create_atomic_t("stored_zero_pages", S_IRUGO,
|
|
zcache_debugfs_root, &zcache_stored_zero_pages);
|
|
debugfs_create_u64("evicted_zpages", S_IRUGO, zcache_debugfs_root,
|
|
&zcache_evict_zpages);
|
|
debugfs_create_u64("evicted_filepages", S_IRUGO, zcache_debugfs_root,
|
|
&zcache_evict_filepages);
|
|
debugfs_create_u64("reclaim_fail", S_IRUGO, zcache_debugfs_root,
|
|
&zcache_reclaim_fail);
|
|
debugfs_create_u64("inactive_pages_refused", S_IRUGO,
|
|
zcache_debugfs_root, &zcache_inactive_pages_refused);
|
|
debugfs_create_u64("pool_shrink_count", S_IRUGO,
|
|
zcache_debugfs_root, &zcache_pool_shrink);
|
|
debugfs_create_u64("pool_shrink_fail", S_IRUGO,
|
|
zcache_debugfs_root, &zcache_pool_shrink_fail);
|
|
debugfs_create_u64("pool_shrink_pages", S_IRUGO,
|
|
zcache_debugfs_root, &zcache_pool_shrink_pages);
|
|
debugfs_create_u64("store_fail", S_IRUGO,
|
|
zcache_debugfs_root, &zcache_store_failed);
|
|
return 0;
|
|
}
|
|
|
|
static void __exit zcache_debugfs_exit(void)
|
|
{
|
|
debugfs_remove_recursive(zcache_debugfs_root);
|
|
}
|
|
#else
|
|
static int __init zcache_debugfs_init(void)
|
|
{
|
|
return 0;
|
|
}
|
|
static void __exit zcache_debugfs_exit(void)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* zcache init and exit
|
|
*/
|
|
static int __init init_zcache(void)
|
|
{
|
|
if (!zcache_enabled)
|
|
return 0;
|
|
|
|
pr_info("loading zcache..\n");
|
|
if (zcache_rbnode_cache_create()) {
|
|
pr_err("entry cache creation failed\n");
|
|
goto error;
|
|
}
|
|
|
|
if (zcache_comp_init()) {
|
|
pr_err("compressor initialization failed\n");
|
|
goto compfail;
|
|
}
|
|
if (zcache_cpu_init()) {
|
|
pr_err("per-cpu initialization failed\n");
|
|
goto pcpufail;
|
|
}
|
|
|
|
spin_lock_init(&zcache.pool_lock);
|
|
cleancache_register_ops(&zcache_ops);
|
|
|
|
if (zcache_debugfs_init())
|
|
pr_warn("debugfs initialization failed\n");
|
|
register_shrinker(&zcache_shrinker);
|
|
return 0;
|
|
pcpufail:
|
|
zcache_comp_exit();
|
|
compfail:
|
|
zcache_rbnode_cache_destroy();
|
|
error:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/* must be late so crypto has time to come up */
|
|
late_initcall(init_zcache);
|
|
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_AUTHOR("Bob Liu <bob.liu@xxxxxxxxxx>");
|
|
MODULE_DESCRIPTION("Compressed cache for clean file pages");
|
|
|