Commit a195db2d authored by Joe Thornber's avatar Joe Thornber Committed by Mike Snitzer
Browse files

dm bio prison: switch to using a red black tree



Previously it was using a fixed sized hash table.  There are times
when very many concurrent cells are held (such as when processing a very
large discard).  When this happens the hash table performance becomes
very poor.

Signed-off-by: default avatarJoe Thornber <ejt@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 33096a78
Loading
Loading
Loading
Loading
+73 −99
Original line number Original line Diff line number Diff line
@@ -14,68 +14,38 @@


/*----------------------------------------------------------------*/
/*----------------------------------------------------------------*/


struct bucket {
#define MIN_CELLS 1024
	spinlock_t lock;
	struct hlist_head cells;
};


struct dm_bio_prison {
struct dm_bio_prison {
	spinlock_t lock;
	mempool_t *cell_pool;
	mempool_t *cell_pool;

	struct rb_root cells;
	unsigned nr_buckets;
	unsigned hash_mask;
	struct bucket *buckets;
};
};


/*----------------------------------------------------------------*/

static uint32_t calc_nr_buckets(unsigned nr_cells)
{
	uint32_t n = 128;

	nr_cells /= 4;
	nr_cells = min(nr_cells, 8192u);

	while (n < nr_cells)
		n <<= 1;

	return n;
}

static struct kmem_cache *_cell_cache;
static struct kmem_cache *_cell_cache;


static void init_bucket(struct bucket *b)
/*----------------------------------------------------------------*/
{
	spin_lock_init(&b->lock);
	INIT_HLIST_HEAD(&b->cells);
}


/*
/*
 * @nr_cells should be the number of cells you want in use _concurrently_.
 * @nr_cells should be the number of cells you want in use _concurrently_.
 * Don't confuse it with the number of distinct keys.
 * Don't confuse it with the number of distinct keys.
 */
 */
struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells)
struct dm_bio_prison *dm_bio_prison_create(void)
{
{
	unsigned i;
	struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL);
	uint32_t nr_buckets = calc_nr_buckets(nr_cells);
	size_t len = sizeof(struct dm_bio_prison) +
		(sizeof(struct bucket) * nr_buckets);
	struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL);


	if (!prison)
	if (!prison)
		return NULL;
		return NULL;


	prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache);
	spin_lock_init(&prison->lock);

	prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
	if (!prison->cell_pool) {
	if (!prison->cell_pool) {
		kfree(prison);
		kfree(prison);
		return NULL;
		return NULL;
	}
	}


	prison->nr_buckets = nr_buckets;
	prison->cells = RB_ROOT;
	prison->hash_mask = nr_buckets - 1;
	prison->buckets = (struct bucket *) (prison + 1);
	for (i = 0; i < nr_buckets; i++)
		init_bucket(prison->buckets + i);


	return prison;
	return prison;
}
}
@@ -101,68 +71,73 @@ void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
}
}
EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);


static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key)
static void __setup_new_cell(struct dm_cell_key *key,
			     struct bio *holder,
			     struct dm_bio_prison_cell *cell)
{
{
	const unsigned long BIG_PRIME = 4294967291UL;
       memcpy(&cell->key, key, sizeof(cell->key));
	uint64_t hash = key->block * BIG_PRIME;
       cell->holder = holder;

       bio_list_init(&cell->bios);
	return (uint32_t) (hash & prison->hash_mask);
}
}


static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs)
static int cmp_keys(struct dm_cell_key *lhs,
		    struct dm_cell_key *rhs)
{
{
	       return (lhs->virtual == rhs->virtual) &&
	if (lhs->virtual < rhs->virtual)
		       (lhs->dev == rhs->dev) &&
		return -1;
		       (lhs->block == rhs->block);
}


static struct bucket *get_bucket(struct dm_bio_prison *prison,
	if (lhs->virtual > rhs->virtual)
				 struct dm_cell_key *key)
		return 1;
{
	return prison->buckets + hash_key(prison, key);
}


static struct dm_bio_prison_cell *__search_bucket(struct bucket *b,
	if (lhs->dev < rhs->dev)
						  struct dm_cell_key *key)
		return -1;
{
	struct dm_bio_prison_cell *cell;


	hlist_for_each_entry(cell, &b->cells, list)
	if (lhs->dev > rhs->dev)
		if (keys_equal(&cell->key, key))
		return 1;
			return cell;


	return NULL;
	if (lhs->block < rhs->block)
}
		return -1;


static void __setup_new_cell(struct bucket *b,
	if (lhs->block > rhs->block)
			     struct dm_cell_key *key,
		return 1;
			     struct bio *holder,

			     struct dm_bio_prison_cell *cell)
	return 0;
{
	memcpy(&cell->key, key, sizeof(cell->key));
	cell->holder = holder;
	bio_list_init(&cell->bios);
	hlist_add_head(&cell->list, &b->cells);
}
}


static int __bio_detain(struct bucket *b,
static int __bio_detain(struct dm_bio_prison *prison,
			struct dm_cell_key *key,
			struct dm_cell_key *key,
			struct bio *inmate,
			struct bio *inmate,
			struct dm_bio_prison_cell *cell_prealloc,
			struct dm_bio_prison_cell *cell_prealloc,
			struct dm_bio_prison_cell **cell_result)
			struct dm_bio_prison_cell **cell_result)
{
{
	struct dm_bio_prison_cell *cell;
	int r;
	struct rb_node **new = &prison->cells.rb_node, *parent = NULL;

	while (*new) {
		struct dm_bio_prison_cell *cell =
			container_of(*new, struct dm_bio_prison_cell, node);

		r = cmp_keys(key, &cell->key);


	cell = __search_bucket(b, key);
		parent = *new;
	if (cell) {
		if (r < 0)
			new = &((*new)->rb_left);
		else if (r > 0)
			new = &((*new)->rb_right);
		else {
			if (inmate)
			if (inmate)
				bio_list_add(&cell->bios, inmate);
				bio_list_add(&cell->bios, inmate);
			*cell_result = cell;
			*cell_result = cell;
			return 1;
			return 1;
		}
		}
	}


	__setup_new_cell(b, key, inmate, cell_prealloc);
	__setup_new_cell(key, inmate, cell_prealloc);
	*cell_result = cell_prealloc;
	*cell_result = cell_prealloc;

	rb_link_node(&cell_prealloc->node, parent, new);
	rb_insert_color(&cell_prealloc->node, &prison->cells);

	return 0;
	return 0;
}
}


@@ -174,11 +149,10 @@ static int bio_detain(struct dm_bio_prison *prison,
{
{
	int r;
	int r;
	unsigned long flags;
	unsigned long flags;
	struct bucket *b = get_bucket(prison, key);


	spin_lock_irqsave(&b->lock, flags);
	spin_lock_irqsave(&prison->lock, flags);
	r = __bio_detain(b, key, inmate, cell_prealloc, cell_result);
	r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
	spin_unlock_irqrestore(&b->lock, flags);
	spin_unlock_irqrestore(&prison->lock, flags);


	return r;
	return r;
}
}
@@ -205,10 +179,11 @@ EXPORT_SYMBOL_GPL(dm_get_cell);
/*
/*
 * @inmates must have been initialised prior to this call
 * @inmates must have been initialised prior to this call
 */
 */
static void __cell_release(struct dm_bio_prison_cell *cell,
static void __cell_release(struct dm_bio_prison *prison,
			   struct dm_bio_prison_cell *cell,
			   struct bio_list *inmates)
			   struct bio_list *inmates)
{
{
	hlist_del(&cell->list);
	rb_erase(&cell->node, &prison->cells);


	if (inmates) {
	if (inmates) {
		if (cell->holder)
		if (cell->holder)
@@ -222,21 +197,21 @@ void dm_cell_release(struct dm_bio_prison *prison,
		     struct bio_list *bios)
		     struct bio_list *bios)
{
{
	unsigned long flags;
	unsigned long flags;
	struct bucket *b = get_bucket(prison, &cell->key);


	spin_lock_irqsave(&b->lock, flags);
	spin_lock_irqsave(&prison->lock, flags);
	__cell_release(cell, bios);
	__cell_release(prison, cell, bios);
	spin_unlock_irqrestore(&b->lock, flags);
	spin_unlock_irqrestore(&prison->lock, flags);
}
}
EXPORT_SYMBOL_GPL(dm_cell_release);
EXPORT_SYMBOL_GPL(dm_cell_release);


/*
/*
 * Sometimes we don't want the holder, just the additional bios.
 * Sometimes we don't want the holder, just the additional bios.
 */
 */
static void __cell_release_no_holder(struct dm_bio_prison_cell *cell,
static void __cell_release_no_holder(struct dm_bio_prison *prison,
				     struct dm_bio_prison_cell *cell,
				     struct bio_list *inmates)
				     struct bio_list *inmates)
{
{
	hlist_del(&cell->list);
	rb_erase(&cell->node, &prison->cells);
	bio_list_merge(inmates, &cell->bios);
	bio_list_merge(inmates, &cell->bios);
}
}


@@ -245,11 +220,10 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
			       struct bio_list *inmates)
			       struct bio_list *inmates)
{
{
	unsigned long flags;
	unsigned long flags;
	struct bucket *b = get_bucket(prison, &cell->key);


	spin_lock_irqsave(&b->lock, flags);
	spin_lock_irqsave(&prison->lock, flags);
	__cell_release_no_holder(cell, inmates);
	__cell_release_no_holder(prison, cell, inmates);
	spin_unlock_irqrestore(&b->lock, flags);
	spin_unlock_irqrestore(&prison->lock, flags);
}
}
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);


+4 −3
Original line number Original line Diff line number Diff line
@@ -10,8 +10,8 @@
#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */
#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */
#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */
#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */


#include <linux/list.h>
#include <linux/bio.h>
#include <linux/bio.h>
#include <linux/rbtree.h>


/*----------------------------------------------------------------*/
/*----------------------------------------------------------------*/


@@ -35,13 +35,14 @@ struct dm_cell_key {
 * themselves.
 * themselves.
 */
 */
struct dm_bio_prison_cell {
struct dm_bio_prison_cell {
	struct hlist_node list;
	struct rb_node node;

	struct dm_cell_key key;
	struct dm_cell_key key;
	struct bio *holder;
	struct bio *holder;
	struct bio_list bios;
	struct bio_list bios;
};
};


struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells);
struct dm_bio_prison *dm_bio_prison_create(void);
void dm_bio_prison_destroy(struct dm_bio_prison *prison);
void dm_bio_prison_destroy(struct dm_bio_prison *prison);


/*
/*
+1 −2
Original line number Original line Diff line number Diff line
@@ -95,7 +95,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)


/*----------------------------------------------------------------*/
/*----------------------------------------------------------------*/


#define PRISON_CELLS 1024
#define MIGRATION_POOL_SIZE 128
#define MIGRATION_POOL_SIZE 128
#define COMMIT_PERIOD HZ
#define COMMIT_PERIOD HZ
#define MIGRATION_COUNT_WINDOW 10
#define MIGRATION_COUNT_WINDOW 10
@@ -2327,7 +2326,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
	INIT_DELAYED_WORK(&cache->waker, do_waker);
	INIT_DELAYED_WORK(&cache->waker, do_waker);
	cache->last_commit_jiffies = jiffies;
	cache->last_commit_jiffies = jiffies;


	cache->prison = dm_bio_prison_create(PRISON_CELLS);
	cache->prison = dm_bio_prison_create();
	if (!cache->prison) {
	if (!cache->prison) {
		*error = "could not create bio prison";
		*error = "could not create bio prison";
		goto bad;
		goto bad;
+1 −2
Original line number Original line Diff line number Diff line
@@ -25,7 +25,6 @@
 */
 */
#define ENDIO_HOOK_POOL_SIZE 1024
#define ENDIO_HOOK_POOL_SIZE 1024
#define MAPPING_POOL_SIZE 1024
#define MAPPING_POOL_SIZE 1024
#define PRISON_CELLS 1024
#define COMMIT_PERIOD HZ
#define COMMIT_PERIOD HZ
#define NO_SPACE_TIMEOUT_SECS 60
#define NO_SPACE_TIMEOUT_SECS 60


@@ -2193,7 +2192,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
		pool->sectors_per_block_shift = __ffs(block_size);
		pool->sectors_per_block_shift = __ffs(block_size);
	pool->low_water_blocks = 0;
	pool->low_water_blocks = 0;
	pool_features_init(&pool->pf);
	pool_features_init(&pool->pf);
	pool->prison = dm_bio_prison_create(PRISON_CELLS);
	pool->prison = dm_bio_prison_create();
	if (!pool->prison) {
	if (!pool->prison) {
		*error = "Error creating pool's bio prison";
		*error = "Error creating pool's bio prison";
		err_p = ERR_PTR(-ENOMEM);
		err_p = ERR_PTR(-ENOMEM);