Commit b82096af authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Mike Snitzer
Browse files

dm ioctl: replace device hash with red-black tree



For high numbers of DM devices the 64-entry hash table has non-trivial
overhead. Fix this by replacing the hash table with a red-black tree.

Reported-by: default avatarZdenek Kabelac <zkabelac@redhat.com>
Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 219a9b5e
Loading
Loading
Loading
Loading
+142 −112
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/dm-ioctl.h>
#include <linux/hdreg.h>
#include <linux/compat.h>
@@ -36,8 +37,10 @@ struct dm_file {
 * name or uuid.
 *---------------------------------------------------------------*/
struct hash_cell {
	struct list_head name_list;
	struct list_head uuid_list;
	struct rb_node name_node;
	struct rb_node uuid_node;
	bool name_set;
	bool uuid_set;

	char *name;
	char *uuid;
@@ -53,10 +56,8 @@ struct vers_iter {
};


#define NUM_BUCKETS 64
#define MASK_BUCKETS (NUM_BUCKETS - 1)
static struct list_head _name_buckets[NUM_BUCKETS];
static struct list_head _uuid_buckets[NUM_BUCKETS];
static struct rb_root name_rb_tree = RB_ROOT;
static struct rb_root uuid_rb_tree = RB_ROOT;

static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred);

@@ -70,73 +71,110 @@ static DECLARE_RWSEM(_hash_lock);
 */
static DEFINE_MUTEX(dm_hash_cells_mutex);

static void init_buckets(struct list_head *buckets)
{
	unsigned int i;

	for (i = 0; i < NUM_BUCKETS; i++)
		INIT_LIST_HEAD(buckets + i);
}

static int dm_hash_init(void)
{
	init_buckets(_name_buckets);
	init_buckets(_uuid_buckets);
	return 0;
}

static void dm_hash_exit(void)
{
	dm_hash_remove_all(false, false, false);
}

/*-----------------------------------------------------------------
 * Hash function:
 * We're not really concerned with the str hash function being
 * fast since it's only used by the ioctl interface.
 *---------------------------------------------------------------*/
static unsigned int hash_str(const char *str)
{
	const unsigned int hash_mult = 2654435387U;
	unsigned int h = 0;

	while (*str)
		h = (h + (unsigned int) *str++) * hash_mult;

	return h & MASK_BUCKETS;
}

/*-----------------------------------------------------------------
 * Code for looking up a device by name
 *---------------------------------------------------------------*/
static struct hash_cell *__get_name_cell(const char *str)
{
	struct hash_cell *hc;
	unsigned int h = hash_str(str);
	struct rb_node *n = name_rb_tree.rb_node;

	list_for_each_entry (hc, _name_buckets + h, name_list)
		if (!strcmp(hc->name, str)) {
	while (n) {
		struct hash_cell *hc = container_of(n, struct hash_cell, name_node);
		int c = strcmp(hc->name, str);
		if (!c) {
			dm_get(hc->md);
			return hc;
		}
		n = c >= 0 ? n->rb_left : n->rb_right;
	}

	return NULL;
}

static struct hash_cell *__get_uuid_cell(const char *str)
{
	struct hash_cell *hc;
	unsigned int h = hash_str(str);
	struct rb_node *n = uuid_rb_tree.rb_node;

	list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
		if (!strcmp(hc->uuid, str)) {
	while (n) {
		struct hash_cell *hc = container_of(n, struct hash_cell, uuid_node);
		int c = strcmp(hc->uuid, str);
		if (!c) {
			dm_get(hc->md);
			return hc;
		}
		n = c >= 0 ? n->rb_left : n->rb_right;
	}

	return NULL;
}

static void __unlink_name(struct hash_cell *hc)
{
	if (hc->name_set) {
		hc->name_set = false;
		rb_erase(&hc->name_node, &name_rb_tree);
	}
}

static void __unlink_uuid(struct hash_cell *hc)
{
	if (hc->uuid_set) {
		hc->uuid_set = false;
		rb_erase(&hc->uuid_node, &uuid_rb_tree);
	}
}

static void __link_name(struct hash_cell *new_hc)
{
	struct rb_node **n, *parent;

	__unlink_name(new_hc);

	new_hc->name_set = true;

	n = &name_rb_tree.rb_node;
	parent = NULL;

	while (*n) {
		struct hash_cell *hc = container_of(*n, struct hash_cell, name_node);
		int c = strcmp(hc->name, new_hc->name);
		BUG_ON(!c);
		parent = *n;
		n = c >= 0 ? &hc->name_node.rb_left : &hc->name_node.rb_right;
	}

	rb_link_node(&new_hc->name_node, parent, n);
	rb_insert_color(&new_hc->name_node, &name_rb_tree);
}

static void __link_uuid(struct hash_cell *new_hc)
{
	struct rb_node **n, *parent;

	__unlink_uuid(new_hc);

	new_hc->uuid_set = true;

	n = &uuid_rb_tree.rb_node;
	parent = NULL;

	while (*n) {
		struct hash_cell *hc = container_of(*n, struct hash_cell, uuid_node);
		int c = strcmp(hc->uuid, new_hc->uuid);
		BUG_ON(!c);
		parent = *n;
		n = c > 0 ? &hc->uuid_node.rb_left : &hc->uuid_node.rb_right;
	}

	rb_link_node(&new_hc->uuid_node, parent, n);
	rb_insert_color(&new_hc->uuid_node, &uuid_rb_tree);
}

static struct hash_cell *__get_dev_cell(uint64_t dev)
{
	struct mapped_device *md;
@@ -185,8 +223,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid,
		}
	}

	INIT_LIST_HEAD(&hc->name_list);
	INIT_LIST_HEAD(&hc->uuid_list);
	hc->name_set = hc->uuid_set = false;
	hc->md = md;
	hc->new_map = NULL;
	return hc;
@@ -226,16 +263,16 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi
		goto bad;
	}

	list_add(&cell->name_list, _name_buckets + hash_str(name));
	__link_name(cell);

	if (uuid) {
		hc = __get_uuid_cell(uuid);
		if (hc) {
			list_del(&cell->name_list);
			__unlink_name(cell);
			dm_put(hc->md);
			goto bad;
		}
		list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
		__link_uuid(cell);
	}
	dm_get(md);
	mutex_lock(&dm_hash_cells_mutex);
@@ -256,9 +293,9 @@ static struct dm_table *__hash_remove(struct hash_cell *hc)
	struct dm_table *table;
	int srcu_idx;

	/* remove from the dev hash */
	list_del(&hc->uuid_list);
	list_del(&hc->name_list);
	/* remove from the dev trees */
	__unlink_name(hc);
	__unlink_uuid(hc);
	mutex_lock(&dm_hash_cells_mutex);
	dm_set_mdptr(hc->md, NULL);
	mutex_unlock(&dm_hash_cells_mutex);
@@ -279,7 +316,8 @@ static struct dm_table *__hash_remove(struct hash_cell *hc)

static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool only_deferred)
{
	int i, dev_skipped;
	int dev_skipped;
	struct rb_node *n;
	struct hash_cell *hc;
	struct mapped_device *md;
	struct dm_table *t;
@@ -289,8 +327,8 @@ static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool

	down_write(&_hash_lock);

	for (i = 0; i < NUM_BUCKETS; i++) {
		list_for_each_entry(hc, _name_buckets + i, name_list) {
	for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
		hc = container_of(n, struct hash_cell, name_node);
		md = hc->md;
		dm_get(md);

@@ -323,7 +361,6 @@ static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool
		 */
		goto retry;
	}
	}

	up_write(&_hash_lock);

@@ -340,7 +377,7 @@ static void __set_cell_uuid(struct hash_cell *hc, char *new_uuid)
	hc->uuid = new_uuid;
	mutex_unlock(&dm_hash_cells_mutex);

	list_add(&hc->uuid_list, _uuid_buckets + hash_str(new_uuid));
	__link_uuid(hc);
}

/*
@@ -354,14 +391,14 @@ static char *__change_cell_name(struct hash_cell *hc, char *new_name)
	/*
	 * Rename and move the name cell.
	 */
	list_del(&hc->name_list);
	__unlink_name(hc);
	old_name = hc->name;

	mutex_lock(&dm_hash_cells_mutex);
	hc->name = new_name;
	mutex_unlock(&dm_hash_cells_mutex);

	list_add(&hc->name_list, _name_buckets + hash_str(new_name));
	__link_name(hc);

	return old_name;
}
@@ -505,7 +542,7 @@ static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,

static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_size)
{
	unsigned int i;
	struct rb_node *n;
	struct hash_cell *hc;
	size_t len, needed = 0;
	struct gendisk *disk;
@@ -518,12 +555,11 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
	 * Loop through all the devices working out how much
	 * space we need.
	 */
	for (i = 0; i < NUM_BUCKETS; i++) {
		list_for_each_entry (hc, _name_buckets + i, name_list) {
	for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
		hc = container_of(n, struct hash_cell, name_node);
		needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1);
		needed += align_val(sizeof(uint32_t));
	}
	}

	/*
	 * Grab our output buffer.
@@ -540,8 +576,8 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
	/*
	 * Now loop through filling out the names.
	 */
	for (i = 0; i < NUM_BUCKETS; i++) {
		list_for_each_entry (hc, _name_buckets + i, name_list) {
	for (n = rb_first(&name_rb_tree); n; n = rb_next(n)) {
		hc = container_of(n, struct hash_cell, name_node);
		if (old_nl)
			old_nl->next = (uint32_t) ((void *) nl -
						   (void *) old_nl);
@@ -555,7 +591,6 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
		*event_nr = dm_get_event_nr(hc->md);
		nl = align_ptr(event_nr + 1);
	}
	}
	/*
	 * If mismatch happens, security may be compromised due to buffer
	 * overflow, so it's better to crash.
@@ -1991,14 +2026,9 @@ int __init dm_interface_init(void)
{
	int r;

	r = dm_hash_init();
	if (r)
		return r;

	r = misc_register(&_dm_misc);
	if (r) {
		DMERR("misc_register failed for control device");
		dm_hash_exit();
		return r;
	}