[sheepdog] [PATCH v2] introduce wrapper functions for pthread_rwlock

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Fri Aug 9 06:03:59 CEST 2013


Currently, we don't check the return values of pthread_rwlock_rdlock()
and pthread_rwlock_wrlock().  When they returns an error, we can call
pthread_rwlock_unlock() even if another thread has the lock.  This can
lead to a bug which is difficult to find.

Those functions can actually return error.  For example, when
pthread_rwlock_rdlock() is called when a write lock is already owned,
it returns EDEADLK without locking.  It is difficult to make sure that
we don't have this kind of deadlock, so checking the return values
strictly is important to ensure our codes is correct.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
v2: Rename structure and functions name.

    sd_mutex_t         -> struct sd_lock

    sd_mutex_init()    -> sd_init_lock()
    sd_mutex_unlock()  -> sd_unlock()
    sd_mutex_rdlock()  -> sd_read_lock()
    sd_mutex_wrlock()  -> sd_write_lock()
    sd_mutex_destroy() -> sd_destroy_lock()


 collie/farm/farm.c        |  6 ++---
 include/util.h            | 58 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/sockfd_cache.c        | 36 ++++++++++++++---------------
 sheep/cluster/zookeeper.c | 32 +++++++++++++-------------
 sheep/md.c                | 42 +++++++++++++++++-----------------
 sheep/object_cache.c      | 44 +++++++++++++++++------------------
 sheep/object_list_cache.c | 26 ++++++++++-----------
 sheep/vdi.c               | 22 +++++++++---------
 sheepfs/volume.c          | 30 ++++++++++++------------
 9 files changed, 177 insertions(+), 119 deletions(-)

diff --git a/collie/farm/farm.c b/collie/farm/farm.c
index 4b43f3d..cdf8e9b 100644
--- a/collie/farm/farm.c
+++ b/collie/farm/farm.c
@@ -21,7 +21,7 @@
 static char farm_object_dir[PATH_MAX];
 static char farm_dir[PATH_MAX];
 
-static pthread_rwlock_t vdi_list_lock = PTHREAD_RWLOCK_INITIALIZER;
+static struct sd_lock vdi_list_lock = SD_LOCK_INITIALIZER;
 struct vdi_entry {
 	char name[SD_MAX_VDI_LEN];
 	uint64_t vdi_size;
@@ -350,9 +350,9 @@ static void do_load_object(struct work *work)
 				   sw->entry.nr_copies) < 0)
 			goto error;
 
-		pthread_rwlock_wrlock(&vdi_list_lock);
+		sd_write_lock(&vdi_list_lock);
 		insert_vdi(buffer);
-		pthread_rwlock_unlock(&vdi_list_lock);
+		sd_unlock(&vdi_list_lock);
 	}
 
 	farm_show_progress(uatomic_add_return(&loaded, 1), trunk_get_count());
diff --git a/include/util.h b/include/util.h
index 0d50f4f..91e94be 100644
--- a/include/util.h
+++ b/include/util.h
@@ -11,6 +11,8 @@
 #include <search.h>
 #include <urcu/uatomic.h>
 #include <sys/eventfd.h>
+#include <pthread.h>
+#include <errno.h>
 
 #include "logger.h"
 #include "bitops.h"
@@ -257,6 +259,62 @@ static inline int refcount_dec(refcnt_t *rc)
 	return uatomic_sub_return(&rc->val, 1);
 }
 
+/* wrapper for pthread_rwlock */
+
+#define SD_LOCK_INITIALIZER { .rwlock = PTHREAD_RWLOCK_INITIALIZER }
+
+struct sd_lock {
+	pthread_rwlock_t rwlock;
+};
+
+static inline void sd_init_lock(struct sd_lock *lock)
+{
+	int ret;
+
+	do {
+		ret = pthread_rwlock_init(&lock->rwlock, NULL);
+	} while (ret == EAGAIN);
+
+	if (ret != 0)
+		panic("failed to initialize a lock, %s", strerror(ret));
+}
+
+static inline void sd_destroy_lock(struct sd_lock *lock)
+{
+	int ret = pthread_rwlock_destroy(&lock->rwlock);
+
+	if (ret != 0)
+		panic("failed to destroy a lock, %s", strerror(ret));
+}
+
+static inline void sd_read_lock(struct sd_lock *lock)
+{
+	int ret;
+
+	do {
+		ret = pthread_rwlock_rdlock(&lock->rwlock);
+	} while (ret == EAGAIN);
+
+	if (ret != 0)
+		panic("failed to lock for reading, %s", strerror(ret));
+}
+
+static inline void sd_write_lock(struct sd_lock *lock)
+{
+	int ret = pthread_rwlock_wrlock(&lock->rwlock);
+
+	if (ret != 0)
+		panic("failed to lock for writing, %s", strerror(ret));
+}
+
+static inline void sd_unlock(struct sd_lock *lock)
+{
+	int ret = pthread_rwlock_unlock(&lock->rwlock);
+
+	if (ret != 0)
+		panic("failed to unlock, %s", strerror(ret));
+}
+
 /* colors */
 #define TEXT_NORMAL         "\033[0m"
 #define TEXT_BOLD           "\033[1m"
diff --git a/lib/sockfd_cache.c b/lib/sockfd_cache.c
index f4efd1b..b1973ba 100644
--- a/lib/sockfd_cache.c
+++ b/lib/sockfd_cache.c
@@ -37,13 +37,13 @@
 
 struct sockfd_cache {
 	struct rb_root root;
-	pthread_rwlock_t lock;
+	struct sd_lock lock;
 	int count;
 };
 
 static struct sockfd_cache sockfd_cache = {
 	.root = RB_ROOT,
-	.lock = PTHREAD_RWLOCK_INITIALIZER,
+	.lock = SD_LOCK_INITIALIZER,
 };
 
 /*
@@ -144,7 +144,7 @@ static struct sockfd_cache_entry *sockfd_cache_grab(const struct node_id *nid,
 {
 	struct sockfd_cache_entry *entry;
 
-	pthread_rwlock_rdlock(&sockfd_cache.lock);
+	sd_read_lock(&sockfd_cache.lock);
 	entry = sockfd_cache_search(nid);
 	if (!entry) {
 		char name[INET6_ADDRSTRLEN];
@@ -158,7 +158,7 @@ static struct sockfd_cache_entry *sockfd_cache_grab(const struct node_id *nid,
 	if (*ret_idx == -1)
 		entry = NULL;
 out:
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_unlock(&sockfd_cache.lock);
 	return entry;
 }
 
@@ -196,7 +196,7 @@ static bool sockfd_cache_destroy(const struct node_id *nid)
 {
 	struct sockfd_cache_entry *entry;
 
-	pthread_rwlock_wrlock(&sockfd_cache.lock);
+	sd_write_lock(&sockfd_cache.lock);
 	entry = sockfd_cache_search(nid);
 	if (!entry) {
 		sd_dprintf("It is already destroyed");
@@ -209,14 +209,14 @@ static bool sockfd_cache_destroy(const struct node_id *nid)
 	}
 
 	rb_erase(&entry->rb, &sockfd_cache.root);
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_unlock(&sockfd_cache.lock);
 
 	destroy_all_slots(entry);
 	free_cache_entry(entry);
 
 	return true;
 false_out:
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_unlock(&sockfd_cache.lock);
 	return false;
 }
 
@@ -243,12 +243,12 @@ void sockfd_cache_add_group(const struct sd_node *nodes, int nr)
 	const struct sd_node *p;
 
 	sd_dprintf("%d", nr);
-	pthread_rwlock_wrlock(&sockfd_cache.lock);
+	sd_write_lock(&sockfd_cache.lock);
 	while (nr--) {
 		p = nodes + nr;
 		sockfd_cache_add_nolock(&p->nid);
 	}
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_unlock(&sockfd_cache.lock);
 }
 
 /* Add one node to the cache means we can do caching tricks on this node */
@@ -258,7 +258,7 @@ void sockfd_cache_add(const struct node_id *nid)
 	char name[INET6_ADDRSTRLEN];
 	int n, i;
 
-	pthread_rwlock_wrlock(&sockfd_cache.lock);
+	sd_write_lock(&sockfd_cache.lock);
 	new = xmalloc(sizeof(*new));
 	new->fds = xzalloc(sizeof(struct sockfd_cache_fd) * fds_count);
 	for (i = 0; i < fds_count; i++)
@@ -267,10 +267,10 @@ void sockfd_cache_add(const struct node_id *nid)
 	memcpy(&new->nid, nid, sizeof(struct node_id));
 	if (sockfd_cache_insert(new)) {
 		free_cache_entry(new);
-		pthread_rwlock_unlock(&sockfd_cache.lock);
+		sd_unlock(&sockfd_cache.lock);
 		return;
 	}
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_unlock(&sockfd_cache.lock);
 	n = uatomic_add_return(&sockfd_cache.count, 1);
 	addr_to_str(name, sizeof(name), nid->addr, 0);
 	sd_dprintf("%s:%d, count %d", name, nid->port, n);
@@ -288,7 +288,7 @@ static void do_grow_fds(struct work *work)
 	int old_fds_count, new_fds_count, new_size, i;
 
 	sd_dprintf("%d", fds_count);
-	pthread_rwlock_wrlock(&sockfd_cache.lock);
+	sd_write_lock(&sockfd_cache.lock);
 	old_fds_count = fds_count;
 	new_fds_count = fds_count * 2;
 	new_size = sizeof(struct sockfd_cache_fd) * fds_count * 2;
@@ -303,7 +303,7 @@ static void do_grow_fds(struct work *work)
 
 	fds_count *= 2;
 	fds_high_watermark = FDS_WATERMARK(fds_count);
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_unlock(&sockfd_cache.lock);
 }
 
 static void grow_fds_done(struct work *work)
@@ -414,11 +414,11 @@ static void sockfd_cache_put_long(const struct node_id *nid, int idx)
 	addr_to_str(name, sizeof(name), addr, 0);
 	sd_dprintf("%s:%d idx %d", name, port, idx);
 
-	pthread_rwlock_rdlock(&sockfd_cache.lock);
+	sd_read_lock(&sockfd_cache.lock);
 	entry = sockfd_cache_search(nid);
 	if (entry)
 		uatomic_set_false(&entry->fds[idx].in_use);
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_unlock(&sockfd_cache.lock);
 }
 
 static void sockfd_cache_close(const struct node_id *nid, int idx)
@@ -432,14 +432,14 @@ static void sockfd_cache_close(const struct node_id *nid, int idx)
 	addr_to_str(name, sizeof(name), addr, 0);
 	sd_dprintf("%s:%d idx %d", name, port, idx);
 
-	pthread_rwlock_wrlock(&sockfd_cache.lock);
+	sd_write_lock(&sockfd_cache.lock);
 	entry = sockfd_cache_search(nid);
 	if (entry) {
 		close(entry->fds[idx].fd);
 		entry->fds[idx].fd = -1;
 		uatomic_set_false(&entry->fds[idx].in_use);
 	}
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_unlock(&sockfd_cache.lock);
 }
 
 /*
diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
index 6e632fc..d28b287 100644
--- a/sheep/cluster/zookeeper.c
+++ b/sheep/cluster/zookeeper.c
@@ -71,8 +71,8 @@ struct zk_event {
 static struct sd_node sd_nodes[SD_MAX_NODES];
 static size_t nr_sd_nodes;
 static struct rb_root zk_node_root = RB_ROOT;
-static pthread_rwlock_t zk_tree_lock = PTHREAD_RWLOCK_INITIALIZER;
-static pthread_rwlock_t zk_compete_master_lock = PTHREAD_RWLOCK_INITIALIZER;
+static struct sd_lock zk_tree_lock = SD_LOCK_INITIALIZER;
+static struct sd_lock zk_compete_master_lock = SD_LOCK_INITIALIZER;
 static LIST_HEAD(zk_block_list);
 static uatomic_bool is_master;
 static uatomic_bool stop;
@@ -132,9 +132,9 @@ static inline struct zk_node *zk_tree_search(const struct node_id *nid)
 {
 	struct zk_node *n;
 
-	pthread_rwlock_rdlock(&zk_tree_lock);
+	sd_read_lock(&zk_tree_lock);
 	n = zk_tree_search_nolock(nid);
-	pthread_rwlock_unlock(&zk_tree_lock);
+	sd_unlock(&zk_tree_lock);
 	return n;
 }
 
@@ -439,7 +439,7 @@ static inline void zk_tree_add(struct zk_node *node)
 {
 	struct zk_node *zk = xzalloc(sizeof(*zk));
 	*zk = *node;
-	pthread_rwlock_wrlock(&zk_tree_lock);
+	sd_write_lock(&zk_tree_lock);
 	if (zk_tree_insert(zk)) {
 		free(zk);
 		goto out;
@@ -450,7 +450,7 @@ static inline void zk_tree_add(struct zk_node *node)
 	 */
 	sd_nodes[nr_sd_nodes++] = zk->node;
 out:
-	pthread_rwlock_unlock(&zk_tree_lock);
+	sd_unlock(&zk_tree_lock);
 }
 
 static inline void zk_tree_del_nolock(struct zk_node *node)
@@ -461,9 +461,9 @@ static inline void zk_tree_del_nolock(struct zk_node *node)
 
 static inline void zk_tree_del(struct zk_node *node)
 {
-	pthread_rwlock_wrlock(&zk_tree_lock);
+	sd_write_lock(&zk_tree_lock);
 	zk_tree_del_nolock(node);
-	pthread_rwlock_unlock(&zk_tree_lock);
+	sd_unlock(&zk_tree_lock);
 }
 
 static inline void zk_tree_destroy(void)
@@ -471,13 +471,13 @@ static inline void zk_tree_destroy(void)
 	struct zk_node *zk;
 	int i;
 
-	pthread_rwlock_wrlock(&zk_tree_lock);
+	sd_write_lock(&zk_tree_lock);
 	for (i = 0; i < nr_sd_nodes; i++) {
 		zk = zk_tree_search_nolock(&sd_nodes[i].nid);
 		if (zk)
 			zk_tree_del_nolock(zk);
 	}
-	pthread_rwlock_unlock(&zk_tree_lock);
+	sd_unlock(&zk_tree_lock);
 }
 
 static inline void build_node_list(void)
@@ -575,11 +575,11 @@ static void zk_watcher(zhandle_t *zh, int type, int state, const char *path,
 		p++;
 		str_to_node(p, &znode.node);
 		/* FIXME: remove redundant leave events */
-		pthread_rwlock_rdlock(&zk_tree_lock);
+		sd_read_lock(&zk_tree_lock);
 		n = zk_tree_search_nolock(&znode.node.nid);
 		if (n)
 			n->gone = true;
-		pthread_rwlock_unlock(&zk_tree_lock);
+		sd_unlock(&zk_tree_lock);
 		if (n)
 			add_event(EVENT_LEAVE, &znode, NULL, 0);
 	}
@@ -730,7 +730,7 @@ static void zk_compete_master(void)
 	 * This is to protect master_seq and my_seq because this function will
 	 * be called by both main thread and zookeeper's event thread.
 	 */
-	pthread_rwlock_wrlock(&zk_compete_master_lock);
+	sd_write_lock(&zk_compete_master_lock);
 
 	if (uatomic_is_true(&is_master) || uatomic_is_true(&stop))
 		goto out_unlock;
@@ -782,7 +782,7 @@ success:
 	uatomic_set_true(&is_master);
 	sd_dprintf("success");
 out_unlock:
-	pthread_rwlock_unlock(&zk_compete_master_lock);
+	sd_unlock(&zk_compete_master_lock);
 }
 
 static int zk_join(const struct sd_node *myself,
@@ -991,12 +991,12 @@ static void zk_handle_update_node(struct zk_event *ev)
 	if (node_eq(snode, &this_node.node))
 		this_node.node = *snode;
 
-	pthread_rwlock_rdlock(&zk_tree_lock);
+	sd_read_lock(&zk_tree_lock);
 	t = zk_tree_search_nolock(&snode->nid);
 	assert(t);
 	t->node = *snode;
 	build_node_list();
-	pthread_rwlock_unlock(&zk_tree_lock);
+	sd_unlock(&zk_tree_lock);
 	sd_update_node_handler(snode);
 }
 
diff --git a/sheep/md.c b/sheep/md.c
index 6934850..181ecbd 100644
--- a/sheep/md.c
+++ b/sheep/md.c
@@ -30,7 +30,7 @@ struct vdisk {
 static struct disk md_disks[MD_MAX_DISK];
 static struct vdisk md_vds[MD_MAX_VDISK];
 
-static pthread_rwlock_t md_lock = PTHREAD_RWLOCK_INITIALIZER;
+static struct sd_lock md_lock = SD_LOCK_INITIALIZER;
 static int md_nr_disks; /* Protected by md_lock */
 static int md_nr_vds;
 
@@ -38,9 +38,9 @@ static inline int nr_online_disks(void)
 {
 	int nr;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_read_lock(&md_lock);
 	nr = md_nr_disks;
-	pthread_rwlock_unlock(&md_lock);
+	sd_unlock(&md_lock);
 
 	return nr;
 }
@@ -337,10 +337,10 @@ char *md_get_object_path(uint64_t oid)
 	struct vdisk *vd;
 	char *p;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_read_lock(&md_lock);
 	vd = oid_to_vdisk(oid);
 	p = md_disks[vd->idx].path;
-	pthread_rwlock_unlock(&md_lock);
+	sd_unlock(&md_lock);
 	sd_dprintf("%d, %s", vd->idx, p);
 
 	return p;
@@ -360,14 +360,14 @@ int for_each_object_in_wd(int (*func)(uint64_t oid, char *path, uint32_t epoch,
 {
 	int i, ret = SD_RES_SUCCESS;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_read_lock(&md_lock);
 	for (i = 0; i < md_nr_disks; i++) {
 		ret = for_each_object_in_path(md_disks[i].path, func,
 					      cleanup, arg);
 		if (ret != SD_RES_SUCCESS)
 			break;
 	}
-	pthread_rwlock_unlock(&md_lock);
+	sd_unlock(&md_lock);
 	return ret;
 }
 
@@ -378,7 +378,7 @@ int for_each_object_in_stale(int (*func)(uint64_t oid, char *path,
 	int i, ret = SD_RES_SUCCESS;
 	char path[PATH_MAX];
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_read_lock(&md_lock);
 	for (i = 0; i < md_nr_disks; i++) {
 		snprintf(path, sizeof(path), "%s/.stale", md_disks[i].path);
 		sd_eprintf("%s", path);
@@ -386,7 +386,7 @@ int for_each_object_in_stale(int (*func)(uint64_t oid, char *path,
 		if (ret != SD_RES_SUCCESS)
 			break;
 	}
-	pthread_rwlock_unlock(&md_lock);
+	sd_unlock(&md_lock);
 	return ret;
 }
 
@@ -395,13 +395,13 @@ int for_each_obj_path(int (*func)(char *path))
 {
 	int i, ret = SD_RES_SUCCESS;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_read_lock(&md_lock);
 	for (i = 0; i < md_nr_disks; i++) {
 		ret = func(md_disks[i].path);
 		if (ret != SD_RES_SUCCESS)
 			break;
 	}
-	pthread_rwlock_unlock(&md_lock);
+	sd_unlock(&md_lock);
 	return ret;
 }
 
@@ -423,7 +423,7 @@ static void md_do_recover(struct work *work)
 	struct md_work *mw = container_of(work, struct md_work, work);
 	int idx, nr = 0;
 
-	pthread_rwlock_wrlock(&md_lock);
+	sd_write_lock(&md_lock);
 	idx = path_to_disk_idx(mw->path);
 	if (idx < 0)
 		/* Just ignore the duplicate EIO of the same path */
@@ -432,7 +432,7 @@ static void md_do_recover(struct work *work)
 	md_init_space();
 	nr = md_nr_disks;
 out:
-	pthread_rwlock_unlock(&md_lock);
+	sd_unlock(&md_lock);
 
 	if (nr > 0)
 		kick_recover();
@@ -549,13 +549,13 @@ static int scan_wd(uint64_t oid, uint32_t epoch)
 {
 	int i, ret = SD_RES_EIO;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_read_lock(&md_lock);
 	for (i = 0; i < md_nr_disks; i++) {
 		ret = md_check_and_move(oid, epoch, md_disks[i].path);
 		if (ret == SD_RES_SUCCESS)
 			break;
 	}
-	pthread_rwlock_unlock(&md_lock);
+	sd_unlock(&md_lock);
 	return ret;
 }
 
@@ -598,7 +598,7 @@ uint32_t md_get_info(struct sd_md_info *info)
 	int i;
 
 	memset(info, 0, ret);
-	pthread_rwlock_rdlock(&md_lock);
+	sd_read_lock(&md_lock);
 	for (i = 0; i < md_nr_disks; i++) {
 		info->disk[i].idx = i;
 		pstrcpy(info->disk[i].path, PATH_MAX, md_disks[i].path);
@@ -607,7 +607,7 @@ uint32_t md_get_info(struct sd_md_info *info)
 							&info->disk[i].used);
 	}
 	info->nr = md_nr_disks;
-	pthread_rwlock_unlock(&md_lock);
+	sd_unlock(&md_lock);
 	return ret;
 }
 
@@ -627,7 +627,7 @@ static int do_plug_unplug(char *disks, bool plug)
 	char *path;
 	int old_nr, cur_nr = 0, ret = SD_RES_UNKNOWN;
 
-	pthread_rwlock_wrlock(&md_lock);
+	sd_write_lock(&md_lock);
 	old_nr = md_nr_disks;
 	path = strtok(disks, ",");
 	do {
@@ -648,7 +648,7 @@ static int do_plug_unplug(char *disks, bool plug)
 
 	ret = SD_RES_SUCCESS;
 out:
-	pthread_rwlock_unlock(&md_lock);
+	sd_unlock(&md_lock);
 
 	/*
 	 * We have to kick recover aggressively because there is possibility
@@ -676,10 +676,10 @@ uint64_t md_get_size(uint64_t *used)
 	uint64_t fsize = 0;
 	*used = 0;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_read_lock(&md_lock);
 	for (int i = 0; i < md_nr_disks; i++)
 		fsize += get_path_free_size(md_disks[i].path, used);
-	pthread_rwlock_unlock(&md_lock);
+	sd_unlock(&md_lock);
 
 	return fsize + *used;
 }
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 8c602a1..7bfe6d4 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -47,7 +47,7 @@ struct object_cache_entry {
 	struct list_head dirty_list; /* For dirty list of object cache */
 	struct list_head lru_list; /* For lru list of object cache */
 
-	pthread_rwlock_t lock; /* Entry lock */
+	struct sd_lock lock; /* Entry lock */
 };
 
 struct object_cache {
@@ -61,7 +61,7 @@ struct object_cache {
 	int push_efd; /* Used to synchronize between pusher and push threads */
 	uatomic_bool in_push; /* Whether if pusher is running */
 
-	pthread_rwlock_t lock; /* Cache lock */
+	struct sd_lock lock; /* Cache lock */
 };
 
 struct push_work {
@@ -77,8 +77,8 @@ static int def_open_flags = O_RDWR;
 #define HASH_BITS	5
 #define HASH_SIZE	(1 << HASH_BITS)
 
-static pthread_rwlock_t hashtable_lock[HASH_SIZE] = {
-	[0 ... HASH_SIZE - 1] = PTHREAD_RWLOCK_INITIALIZER
+static struct sd_lock hashtable_lock[HASH_SIZE] = {
+	[0 ... HASH_SIZE - 1] = SD_LOCK_INITIALIZER
 };
 
 static struct hlist_head cache_hashtable[HASH_SIZE];
@@ -167,32 +167,32 @@ static inline bool entry_in_use(struct object_cache_entry *entry)
  */
 static inline void read_lock_cache(struct object_cache *oc)
 {
-	pthread_rwlock_rdlock(&oc->lock);
+	sd_read_lock(&oc->lock);
 }
 
 static inline void write_lock_cache(struct object_cache *oc)
 {
-	pthread_rwlock_wrlock(&oc->lock);
+	sd_write_lock(&oc->lock);
 }
 
 static inline void unlock_cache(struct object_cache *oc)
 {
-	pthread_rwlock_unlock(&oc->lock);
+	sd_unlock(&oc->lock);
 }
 
 static inline void read_lock_entry(struct object_cache_entry *entry)
 {
-	pthread_rwlock_rdlock(&entry->lock);
+	sd_read_lock(&entry->lock);
 }
 
 static inline void write_lock_entry(struct object_cache_entry *entry)
 {
-	pthread_rwlock_wrlock(&entry->lock);
+	sd_write_lock(&entry->lock);
 }
 
 static inline void unlock_entry(struct object_cache_entry *entry)
 {
-	pthread_rwlock_unlock(&entry->lock);
+	sd_unlock(&entry->lock);
 }
 
 static struct object_cache_entry *
@@ -300,7 +300,7 @@ free_cache_entry(struct object_cache_entry *entry)
 	list_del_init(&entry->lru_list);
 	if (!list_empty(&entry->dirty_list))
 		del_from_dirty_list(entry);
-	pthread_rwlock_destroy(&entry->lock);
+	sd_destroy_lock(&entry->lock);
 	free(entry);
 }
 
@@ -589,19 +589,19 @@ static void do_reclaim(struct work *work)
 		int idx = (i + j) % HASH_SIZE;
 		struct hlist_head *head = cache_hashtable + idx;
 
-		pthread_rwlock_rdlock(&hashtable_lock[idx]);
+		sd_read_lock(&hashtable_lock[idx]);
 		hlist_for_each_entry(cache, node, head, hash) {
 			uint32_t cap;
 
 			do_reclaim_object(cache);
 			cap = uatomic_read(&gcache.capacity);
 			if (cap <= HIGH_WATERMARK) {
-				pthread_rwlock_unlock(&hashtable_lock[idx]);
+				sd_unlock(&hashtable_lock[idx]);
 				sd_dprintf("complete, capacity %"PRIu32, cap);
 				return;
 			}
 		}
-		pthread_rwlock_unlock(&hashtable_lock[idx]);
+		sd_unlock(&hashtable_lock[idx]);
 	}
 	sd_dprintf("finished");
 }
@@ -634,9 +634,9 @@ static struct object_cache *find_object_cache(uint32_t vid, bool create)
 	struct hlist_node *node;
 
 	if (create)
-		pthread_rwlock_wrlock(&hashtable_lock[h]);
+		sd_write_lock(&hashtable_lock[h]);
 	else
-		pthread_rwlock_rdlock(&hashtable_lock[h]);
+		sd_read_lock(&hashtable_lock[h]);
 
 	if (hlist_empty(head))
 		goto not_found;
@@ -656,13 +656,13 @@ not_found:
 		INIT_LIST_HEAD(&cache->dirty_head);
 		INIT_LIST_HEAD(&cache->lru_head);
 
-		pthread_rwlock_init(&cache->lock, NULL);
+		sd_init_lock(&cache->lock);
 		hlist_add_head(&cache->hash, head);
 	} else {
 		cache = NULL;
 	}
 out:
-	pthread_rwlock_unlock(&hashtable_lock[h]);
+	sd_unlock(&hashtable_lock[h]);
 	return cache;
 }
 
@@ -695,7 +695,7 @@ alloc_cache_entry(struct object_cache *oc, uint32_t idx)
 	entry = xzalloc(sizeof(*entry));
 	entry->oc = oc;
 	entry->idx = idx;
-	pthread_rwlock_init(&entry->lock, NULL);
+	sd_init_lock(&entry->lock);
 	INIT_LIST_HEAD(&entry->dirty_list);
 	INIT_LIST_HEAD(&entry->lru_list);
 
@@ -977,9 +977,9 @@ void object_cache_delete(uint32_t vid)
 		return;
 
 	/* Firstly we free memeory */
-	pthread_rwlock_wrlock(&hashtable_lock[h]);
+	sd_write_lock(&hashtable_lock[h]);
 	hlist_del(&cache->hash);
-	pthread_rwlock_unlock(&hashtable_lock[h]);
+	sd_unlock(&hashtable_lock[h]);
 
 	write_lock_cache(cache);
 	list_for_each_entry_safe(entry, t, &cache->lru_head, lru_list) {
@@ -987,7 +987,7 @@ void object_cache_delete(uint32_t vid)
 		uatomic_sub(&gcache.capacity, CACHE_OBJECT_SIZE);
 	}
 	unlock_cache(cache);
-	pthread_rwlock_destroy(&cache->lock);
+	sd_destroy_lock(&cache->lock);
 	close(cache->push_efd);
 	free(cache);
 
diff --git a/sheep/object_list_cache.c b/sheep/object_list_cache.c
index 6d5139f..fc912ca 100644
--- a/sheep/object_list_cache.c
+++ b/sheep/object_list_cache.c
@@ -26,7 +26,7 @@ struct objlist_cache {
 	uint64_t *buf;
 	struct list_head entry_list;
 	struct rb_root root;
-	pthread_rwlock_t lock;
+	struct sd_lock lock;
 };
 
 struct objlist_deletion_work {
@@ -38,7 +38,7 @@ static struct objlist_cache obj_list_cache = {
 	.tree_version	= 1,
 	.root		= RB_ROOT,
 	.entry_list     = LIST_HEAD_INIT(obj_list_cache.entry_list),
-	.lock		= PTHREAD_RWLOCK_INITIALIZER,
+	.lock		= SD_LOCK_INITIALIZER,
 };
 
 static struct objlist_cache_entry *objlist_cache_rb_insert(struct rb_root *root,
@@ -92,12 +92,12 @@ static int objlist_cache_rb_remove(struct rb_root *root, uint64_t oid)
 
 void objlist_cache_remove(uint64_t oid)
 {
-	pthread_rwlock_wrlock(&obj_list_cache.lock);
+	sd_write_lock(&obj_list_cache.lock);
 	if (!objlist_cache_rb_remove(&obj_list_cache.root, oid)) {
 		obj_list_cache.cache_size--;
 		obj_list_cache.tree_version++;
 	}
-	pthread_rwlock_unlock(&obj_list_cache.lock);
+	sd_unlock(&obj_list_cache.lock);
 }
 
 int objlist_cache_insert(uint64_t oid)
@@ -108,7 +108,7 @@ int objlist_cache_insert(uint64_t oid)
 	entry->oid = oid;
 	rb_init_node(&entry->node);
 
-	pthread_rwlock_wrlock(&obj_list_cache.lock);
+	sd_write_lock(&obj_list_cache.lock);
 	p = objlist_cache_rb_insert(&obj_list_cache.root, entry);
 	if (p)
 		free(entry);
@@ -117,7 +117,7 @@ int objlist_cache_insert(uint64_t oid)
 		obj_list_cache.cache_size++;
 		obj_list_cache.tree_version++;
 	}
-	pthread_rwlock_unlock(&obj_list_cache.lock);
+	sd_unlock(&obj_list_cache.lock);
 
 	return 0;
 }
@@ -128,13 +128,13 @@ int get_obj_list(const struct sd_req *hdr, struct sd_rsp *rsp, void *data)
 	struct objlist_cache_entry *entry;
 
 	/* first try getting the cached buffer with only a read lock held */
-	pthread_rwlock_rdlock(&obj_list_cache.lock);
+	sd_read_lock(&obj_list_cache.lock);
 	if (obj_list_cache.tree_version == obj_list_cache.buf_version)
 		goto out;
 
 	/* if that fails grab a write lock for the usually nessecary update */
-	pthread_rwlock_unlock(&obj_list_cache.lock);
-	pthread_rwlock_wrlock(&obj_list_cache.lock);
+	sd_unlock(&obj_list_cache.lock);
+	sd_write_lock(&obj_list_cache.lock);
 	if (obj_list_cache.tree_version == obj_list_cache.buf_version)
 		goto out;
 
@@ -148,14 +148,14 @@ int get_obj_list(const struct sd_req *hdr, struct sd_rsp *rsp, void *data)
 
 out:
 	if (hdr->data_length < obj_list_cache.cache_size * sizeof(uint64_t)) {
-		pthread_rwlock_unlock(&obj_list_cache.lock);
+		sd_unlock(&obj_list_cache.lock);
 		sd_eprintf("GET_OBJ_LIST buffer too small");
 		return SD_RES_BUFFER_SMALL;
 	}
 
 	rsp->data_length = obj_list_cache.cache_size * sizeof(uint64_t);
 	memcpy(data, obj_list_cache.buf, rsp->data_length);
-	pthread_rwlock_unlock(&obj_list_cache.lock);
+	sd_unlock(&obj_list_cache.lock);
 	return SD_RES_SUCCESS;
 }
 
@@ -179,7 +179,7 @@ static void objlist_deletion_work(struct work *work)
 		return;
 	}
 
-	pthread_rwlock_wrlock(&obj_list_cache.lock);
+	sd_write_lock(&obj_list_cache.lock);
 	list_for_each_entry_safe(entry, t, &obj_list_cache.entry_list, list) {
 		entry_vid = oid_to_vid(entry->oid);
 		if (entry_vid != vid)
@@ -189,7 +189,7 @@ static void objlist_deletion_work(struct work *work)
 		rb_erase(&entry->node, &obj_list_cache.root);
 		free(entry);
 	}
-	pthread_rwlock_unlock(&obj_list_cache.lock);
+	sd_unlock(&obj_list_cache.lock);
 }
 
 static void objlist_deletion_done(struct work *work)
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 6839bba..24c0ef0 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -20,7 +20,7 @@ struct vdi_state_entry {
 
 static uint32_t max_copies;
 static struct rb_root vdi_state_root = RB_ROOT;
-static pthread_rwlock_t vdi_state_lock = PTHREAD_RWLOCK_INITIALIZER;
+static struct sd_lock vdi_state_lock = SD_LOCK_INITIALIZER;
 
 static struct vdi_state_entry *vdi_state_search(struct rb_root *root,
 						uint32_t vid)
@@ -70,9 +70,9 @@ static bool vid_is_snapshot(uint32_t vid)
 {
 	struct vdi_state_entry *entry;
 
-	pthread_rwlock_rdlock(&vdi_state_lock);
+	sd_read_lock(&vdi_state_lock);
 	entry = vdi_state_search(&vdi_state_root, vid);
-	pthread_rwlock_unlock(&vdi_state_lock);
+	sd_unlock(&vdi_state_lock);
 
 	if (!entry) {
 		sd_eprintf("No VDI entry for %" PRIx32 " found", vid);
@@ -95,9 +95,9 @@ int get_vdi_copy_number(uint32_t vid)
 {
 	struct vdi_state_entry *entry;
 
-	pthread_rwlock_rdlock(&vdi_state_lock);
+	sd_read_lock(&vdi_state_lock);
 	entry = vdi_state_search(&vdi_state_root, vid);
-	pthread_rwlock_unlock(&vdi_state_lock);
+	sd_unlock(&vdi_state_lock);
 
 	if (!entry) {
 		sd_eprintf("No VDI copy entry for %" PRIx32 " found", vid);
@@ -145,7 +145,7 @@ int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot)
 
 	sd_dprintf("%" PRIx32 ", %d", vid, nr_copies);
 
-	pthread_rwlock_wrlock(&vdi_state_lock);
+	sd_write_lock(&vdi_state_lock);
 	old = vdi_state_insert(&vdi_state_root, entry);
 	if (old) {
 		free(entry);
@@ -157,7 +157,7 @@ int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot)
 	if (uatomic_read(&max_copies) == 0 ||
 	    nr_copies > uatomic_read(&max_copies))
 		uatomic_set(&max_copies, nr_copies);
-	pthread_rwlock_unlock(&vdi_state_lock);
+	sd_unlock(&vdi_state_lock);
 
 	return SD_RES_SUCCESS;
 }
@@ -169,7 +169,7 @@ int fill_vdi_state_list(void *data)
 	struct vdi_state *vs = data;
 	struct vdi_state_entry *entry;
 
-	pthread_rwlock_rdlock(&vdi_state_lock);
+	sd_read_lock(&vdi_state_lock);
 	for (n = rb_first(&vdi_state_root); n; n = rb_next(n)) {
 		entry = rb_entry(n, struct vdi_state_entry, node);
 		memset(vs, 0, sizeof(*vs));
@@ -179,7 +179,7 @@ int fill_vdi_state_list(void *data)
 		vs++;
 		nr++;
 	}
-	pthread_rwlock_unlock(&vdi_state_lock);
+	sd_unlock(&vdi_state_lock);
 
 	return nr * sizeof(*vs);
 }
@@ -953,7 +953,7 @@ void clean_vdi_state(void)
 	struct rb_node *current_node = rb_first(&vdi_state_root);
 	struct vdi_state_entry *entry = NULL;
 
-	pthread_rwlock_wrlock(&vdi_state_lock);
+	sd_write_lock(&vdi_state_lock);
 	while (current_node) {
 		entry = rb_entry(current_node, struct vdi_state_entry, node);
 		rb_erase(current_node, &vdi_state_root);
@@ -962,5 +962,5 @@ void clean_vdi_state(void)
 		current_node = rb_first(&vdi_state_root);
 	}
 	INIT_RB_ROOT(&vdi_state_root);
-	pthread_rwlock_unlock(&vdi_state_lock);
+	sd_unlock(&vdi_state_lock);
 }
diff --git a/sheepfs/volume.c b/sheepfs/volume.c
index efd00ce..73e7419 100644
--- a/sheepfs/volume.c
+++ b/sheepfs/volume.c
@@ -62,7 +62,7 @@ struct vdi_inode {
 };
 
 static struct rb_root vdi_inode_tree = RB_ROOT;
-static pthread_rwlock_t vdi_inode_tree_lock = PTHREAD_RWLOCK_INITIALIZER;
+static struct sd_lock vdi_inode_tree_lock = SD_LOCK_INITIALIZER;
 
 static struct vdi_inode *vdi_inode_tree_insert(struct vdi_inode *new)
 {
@@ -147,9 +147,9 @@ static int volume_rw_object(char *buf, uint64_t oid, size_t size,
 	unsigned long idx = 0;
 	uint64_t cow_oid = 0;
 
-	pthread_rwlock_rdlock(&vdi_inode_tree_lock);
+	sd_read_lock(&vdi_inode_tree_lock);
 	vdi = vdi_inode_tree_search(vid);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_unlock(&vdi_inode_tree_lock);
 
 	if (is_data_obj(oid)) {
 		idx = data_oid_to_idx(oid);
@@ -291,9 +291,9 @@ static int volume_do_sync(uint32_t vid)
 	int ret, fd, idx;
 	struct vdi_inode *vdi;
 
-	pthread_rwlock_rdlock(&vdi_inode_tree_lock);
+	sd_read_lock(&vdi_inode_tree_lock);
 	vdi = vdi_inode_tree_search(vid);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_unlock(&vdi_inode_tree_lock);
 
 	hdr.opcode = SD_OP_FLUSH_VDI;
 	hdr.obj.oid = vid_to_vdi_oid(vid);
@@ -368,7 +368,7 @@ int reset_socket_pool(void)
 	struct vdi_inode *vdi;
 	int ret = 0;
 
-	pthread_rwlock_rdlock(&vdi_inode_tree_lock);
+	sd_read_lock(&vdi_inode_tree_lock);
 	for (node = rb_first(&vdi_inode_tree); node; node = rb_next(node)) {
 		vdi = rb_entry(node, struct vdi_inode, rb);
 		destroy_socket_pool(vdi->socket_pool, SOCKET_POOL_SIZE);
@@ -379,7 +379,7 @@ int reset_socket_pool(void)
 		}
 	}
 out:
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_unlock(&vdi_inode_tree_lock);
 	return ret;
 }
 
@@ -414,9 +414,9 @@ static int init_vdi_info(const char *entry, uint32_t *vid, size_t *size)
 		goto err;
 	}
 	/* we need insert inode before calling volume_rw_object */
-	pthread_rwlock_wrlock(&vdi_inode_tree_lock);
+	sd_write_lock(&vdi_inode_tree_lock);
 	dummy = vdi_inode_tree_insert(inode);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_unlock(&vdi_inode_tree_lock);
 	if (dummy)
 		goto err;
 	if (volume_rw_object(inode_buf, vid_to_vdi_oid(*vid), SD_INODE_SIZE,
@@ -478,9 +478,9 @@ static int volume_sync_and_delete(uint32_t vid)
 	int ret, fd, idx;
 	struct vdi_inode *vdi;
 
-	pthread_rwlock_rdlock(&vdi_inode_tree_lock);
+	sd_read_lock(&vdi_inode_tree_lock);
 	vdi = vdi_inode_tree_search(vid);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_unlock(&vdi_inode_tree_lock);
 
 	hdr.opcode = SD_OP_FLUSH_DEL_CACHE;
 	hdr.obj.oid = vid_to_vdi_oid(vid);
@@ -517,14 +517,14 @@ int volume_remove_entry(const char *entry)
 	if (sheepfs_object_cache && volume_sync_and_delete(vid) < 0)
 		return -1;
 
-	pthread_rwlock_rdlock(&vdi_inode_tree_lock);
+	sd_read_lock(&vdi_inode_tree_lock);
 	vdi = vdi_inode_tree_search(vid);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_unlock(&vdi_inode_tree_lock);
 	destroy_socket_pool(vdi->socket_pool, SOCKET_POOL_SIZE);
 
-	pthread_rwlock_wrlock(&vdi_inode_tree_lock);
+	sd_write_lock(&vdi_inode_tree_lock);
 	rb_erase(&vdi->rb, &vdi_inode_tree);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_unlock(&vdi_inode_tree_lock);
 
 	free(vdi->inode);
 	free(vdi);
-- 
1.8.1.3.566.gaa39828




More information about the sheepdog mailing list