[sheepdog] [PATCH] introduce wrapper functions for pthread_rwlock

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Wed Aug 7 16:46:32 CEST 2013


Currently, we don't check the return values of pthread_rwlock_rdlock()
and pthread_rwlock_wrlock().  When they returns an error, we can call
pthread_rwlock_unlock() even if another thread has the lock.  This can
lead to a bug which is difficult to find.

Those functions can actually return error.  For example, when
pthread_rwlock_rdlock() is called when a write lock is already owned,
it returns EDEADLK without locking.  It is difficult to make sure that
we don't have this kind of deadlock, so checking the return values
strictly is important to ensure our codes is correct.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 collie/farm/farm.c        |  6 ++---
 include/util.h            | 56 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/sockfd_cache.c        | 36 +++++++++++++++---------------
 sheep/cluster/zookeeper.c | 32 +++++++++++++--------------
 sheep/md.c                | 42 +++++++++++++++++------------------
 sheep/object_cache.c      | 44 ++++++++++++++++++-------------------
 sheep/object_list_cache.c | 26 +++++++++++-----------
 sheep/vdi.c               | 22 +++++++++----------
 sheepfs/volume.c          | 30 ++++++++++++-------------
 9 files changed, 175 insertions(+), 119 deletions(-)

diff --git a/collie/farm/farm.c b/collie/farm/farm.c
index 2a4e307..3da9f8d 100644
--- a/collie/farm/farm.c
+++ b/collie/farm/farm.c
@@ -21,7 +21,7 @@
 static char farm_object_dir[PATH_MAX];
 static char farm_dir[PATH_MAX];
 
-static pthread_rwlock_t vdi_list_lock = PTHREAD_RWLOCK_INITIALIZER;
+static sd_mutex_t vdi_list_lock = SD_MUTEX_INITIALIZER;
 struct vdi_entry {
 	char name[SD_MAX_VDI_LEN];
 	uint64_t vdi_size;
@@ -350,9 +350,9 @@ static void do_load_object(struct work *work)
 				   sw->entry.nr_copies) < 0)
 			goto error;
 
-		pthread_rwlock_wrlock(&vdi_list_lock);
+		sd_mutex_wrlock(&vdi_list_lock);
 		insert_vdi(buffer);
-		pthread_rwlock_unlock(&vdi_list_lock);
+		sd_mutex_unlock(&vdi_list_lock);
 	}
 
 	farm_show_progress(uatomic_add_return(&loaded, 1), trunk_get_count());
diff --git a/include/util.h b/include/util.h
index 0d50f4f..bfbe451 100644
--- a/include/util.h
+++ b/include/util.h
@@ -11,6 +11,8 @@
 #include <search.h>
 #include <urcu/uatomic.h>
 #include <sys/eventfd.h>
+#include <pthread.h>
+#include <errno.h>
 
 #include "logger.h"
 #include "bitops.h"
@@ -257,6 +259,60 @@ static inline int refcount_dec(refcnt_t *rc)
 	return uatomic_sub_return(&rc->val, 1);
 }
 
+/* wrapper for pthread_rwlock */
+
+#define SD_MUTEX_INITIALIZER PTHREAD_RWLOCK_INITIALIZER
+
+typedef pthread_rwlock_t sd_mutex_t;
+
+static inline void sd_mutex_init(sd_mutex_t *mutex)
+{
+	int ret;
+
+	do {
+		ret = pthread_rwlock_init(mutex, NULL);
+	} while (ret == EAGAIN);
+
+	if (ret != 0)
+		panic("failed to initialize a mutex, %s", strerror(ret));
+}
+
+static inline void sd_mutex_destroy(sd_mutex_t *mutex)
+{
+	int ret = pthread_rwlock_destroy(mutex);
+
+	if (ret != 0)
+		panic("failed to destroy a mutex, %s", strerror(ret));
+}
+
+static inline void sd_mutex_rdlock(sd_mutex_t *mutex)
+{
+	int ret;
+
+	do {
+		ret = pthread_rwlock_rdlock(mutex);
+	} while (ret == EAGAIN);
+
+	if (ret != 0)
+		panic("failed to lock for reading, %s", strerror(ret));
+}
+
+static inline void sd_mutex_wrlock(sd_mutex_t *mutex)
+{
+	int ret = pthread_rwlock_wrlock(mutex);
+
+	if (ret != 0)
+		panic("failed to lock for writing, %s", strerror(ret));
+}
+
+static inline void sd_mutex_unlock(sd_mutex_t *mutex)
+{
+	int ret = pthread_rwlock_unlock(mutex);
+
+	if (ret != 0)
+		panic("failed to unlock, %s", strerror(ret));
+}
+
 /* colors */
 #define TEXT_NORMAL         "\033[0m"
 #define TEXT_BOLD           "\033[1m"
diff --git a/lib/sockfd_cache.c b/lib/sockfd_cache.c
index f4efd1b..1458867 100644
--- a/lib/sockfd_cache.c
+++ b/lib/sockfd_cache.c
@@ -37,13 +37,13 @@
 
 struct sockfd_cache {
 	struct rb_root root;
-	pthread_rwlock_t lock;
+	sd_mutex_t lock;
 	int count;
 };
 
 static struct sockfd_cache sockfd_cache = {
 	.root = RB_ROOT,
-	.lock = PTHREAD_RWLOCK_INITIALIZER,
+	.lock = SD_MUTEX_INITIALIZER,
 };
 
 /*
@@ -144,7 +144,7 @@ static struct sockfd_cache_entry *sockfd_cache_grab(const struct node_id *nid,
 {
 	struct sockfd_cache_entry *entry;
 
-	pthread_rwlock_rdlock(&sockfd_cache.lock);
+	sd_mutex_rdlock(&sockfd_cache.lock);
 	entry = sockfd_cache_search(nid);
 	if (!entry) {
 		char name[INET6_ADDRSTRLEN];
@@ -158,7 +158,7 @@ static struct sockfd_cache_entry *sockfd_cache_grab(const struct node_id *nid,
 	if (*ret_idx == -1)
 		entry = NULL;
 out:
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_mutex_unlock(&sockfd_cache.lock);
 	return entry;
 }
 
@@ -196,7 +196,7 @@ static bool sockfd_cache_destroy(const struct node_id *nid)
 {
 	struct sockfd_cache_entry *entry;
 
-	pthread_rwlock_wrlock(&sockfd_cache.lock);
+	sd_mutex_wrlock(&sockfd_cache.lock);
 	entry = sockfd_cache_search(nid);
 	if (!entry) {
 		sd_dprintf("It is already destroyed");
@@ -209,14 +209,14 @@ static bool sockfd_cache_destroy(const struct node_id *nid)
 	}
 
 	rb_erase(&entry->rb, &sockfd_cache.root);
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_mutex_unlock(&sockfd_cache.lock);
 
 	destroy_all_slots(entry);
 	free_cache_entry(entry);
 
 	return true;
 false_out:
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_mutex_unlock(&sockfd_cache.lock);
 	return false;
 }
 
@@ -243,12 +243,12 @@ void sockfd_cache_add_group(const struct sd_node *nodes, int nr)
 	const struct sd_node *p;
 
 	sd_dprintf("%d", nr);
-	pthread_rwlock_wrlock(&sockfd_cache.lock);
+	sd_mutex_wrlock(&sockfd_cache.lock);
 	while (nr--) {
 		p = nodes + nr;
 		sockfd_cache_add_nolock(&p->nid);
 	}
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_mutex_unlock(&sockfd_cache.lock);
 }
 
 /* Add one node to the cache means we can do caching tricks on this node */
@@ -258,7 +258,7 @@ void sockfd_cache_add(const struct node_id *nid)
 	char name[INET6_ADDRSTRLEN];
 	int n, i;
 
-	pthread_rwlock_wrlock(&sockfd_cache.lock);
+	sd_mutex_wrlock(&sockfd_cache.lock);
 	new = xmalloc(sizeof(*new));
 	new->fds = xzalloc(sizeof(struct sockfd_cache_fd) * fds_count);
 	for (i = 0; i < fds_count; i++)
@@ -267,10 +267,10 @@ void sockfd_cache_add(const struct node_id *nid)
 	memcpy(&new->nid, nid, sizeof(struct node_id));
 	if (sockfd_cache_insert(new)) {
 		free_cache_entry(new);
-		pthread_rwlock_unlock(&sockfd_cache.lock);
+		sd_mutex_unlock(&sockfd_cache.lock);
 		return;
 	}
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_mutex_unlock(&sockfd_cache.lock);
 	n = uatomic_add_return(&sockfd_cache.count, 1);
 	addr_to_str(name, sizeof(name), nid->addr, 0);
 	sd_dprintf("%s:%d, count %d", name, nid->port, n);
@@ -288,7 +288,7 @@ static void do_grow_fds(struct work *work)
 	int old_fds_count, new_fds_count, new_size, i;
 
 	sd_dprintf("%d", fds_count);
-	pthread_rwlock_wrlock(&sockfd_cache.lock);
+	sd_mutex_wrlock(&sockfd_cache.lock);
 	old_fds_count = fds_count;
 	new_fds_count = fds_count * 2;
 	new_size = sizeof(struct sockfd_cache_fd) * fds_count * 2;
@@ -303,7 +303,7 @@ static void do_grow_fds(struct work *work)
 
 	fds_count *= 2;
 	fds_high_watermark = FDS_WATERMARK(fds_count);
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_mutex_unlock(&sockfd_cache.lock);
 }
 
 static void grow_fds_done(struct work *work)
@@ -414,11 +414,11 @@ static void sockfd_cache_put_long(const struct node_id *nid, int idx)
 	addr_to_str(name, sizeof(name), addr, 0);
 	sd_dprintf("%s:%d idx %d", name, port, idx);
 
-	pthread_rwlock_rdlock(&sockfd_cache.lock);
+	sd_mutex_rdlock(&sockfd_cache.lock);
 	entry = sockfd_cache_search(nid);
 	if (entry)
 		uatomic_set_false(&entry->fds[idx].in_use);
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_mutex_unlock(&sockfd_cache.lock);
 }
 
 static void sockfd_cache_close(const struct node_id *nid, int idx)
@@ -432,14 +432,14 @@ static void sockfd_cache_close(const struct node_id *nid, int idx)
 	addr_to_str(name, sizeof(name), addr, 0);
 	sd_dprintf("%s:%d idx %d", name, port, idx);
 
-	pthread_rwlock_wrlock(&sockfd_cache.lock);
+	sd_mutex_wrlock(&sockfd_cache.lock);
 	entry = sockfd_cache_search(nid);
 	if (entry) {
 		close(entry->fds[idx].fd);
 		entry->fds[idx].fd = -1;
 		uatomic_set_false(&entry->fds[idx].in_use);
 	}
-	pthread_rwlock_unlock(&sockfd_cache.lock);
+	sd_mutex_unlock(&sockfd_cache.lock);
 }
 
 /*
diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
index 6e632fc..97499cf 100644
--- a/sheep/cluster/zookeeper.c
+++ b/sheep/cluster/zookeeper.c
@@ -71,8 +71,8 @@ struct zk_event {
 static struct sd_node sd_nodes[SD_MAX_NODES];
 static size_t nr_sd_nodes;
 static struct rb_root zk_node_root = RB_ROOT;
-static pthread_rwlock_t zk_tree_lock = PTHREAD_RWLOCK_INITIALIZER;
-static pthread_rwlock_t zk_compete_master_lock = PTHREAD_RWLOCK_INITIALIZER;
+static sd_mutex_t zk_tree_lock = SD_MUTEX_INITIALIZER;
+static sd_mutex_t zk_compete_master_lock = SD_MUTEX_INITIALIZER;
 static LIST_HEAD(zk_block_list);
 static uatomic_bool is_master;
 static uatomic_bool stop;
@@ -132,9 +132,9 @@ static inline struct zk_node *zk_tree_search(const struct node_id *nid)
 {
 	struct zk_node *n;
 
-	pthread_rwlock_rdlock(&zk_tree_lock);
+	sd_mutex_rdlock(&zk_tree_lock);
 	n = zk_tree_search_nolock(nid);
-	pthread_rwlock_unlock(&zk_tree_lock);
+	sd_mutex_unlock(&zk_tree_lock);
 	return n;
 }
 
@@ -439,7 +439,7 @@ static inline void zk_tree_add(struct zk_node *node)
 {
 	struct zk_node *zk = xzalloc(sizeof(*zk));
 	*zk = *node;
-	pthread_rwlock_wrlock(&zk_tree_lock);
+	sd_mutex_wrlock(&zk_tree_lock);
 	if (zk_tree_insert(zk)) {
 		free(zk);
 		goto out;
@@ -450,7 +450,7 @@ static inline void zk_tree_add(struct zk_node *node)
 	 */
 	sd_nodes[nr_sd_nodes++] = zk->node;
 out:
-	pthread_rwlock_unlock(&zk_tree_lock);
+	sd_mutex_unlock(&zk_tree_lock);
 }
 
 static inline void zk_tree_del_nolock(struct zk_node *node)
@@ -461,9 +461,9 @@ static inline void zk_tree_del_nolock(struct zk_node *node)
 
 static inline void zk_tree_del(struct zk_node *node)
 {
-	pthread_rwlock_wrlock(&zk_tree_lock);
+	sd_mutex_wrlock(&zk_tree_lock);
 	zk_tree_del_nolock(node);
-	pthread_rwlock_unlock(&zk_tree_lock);
+	sd_mutex_unlock(&zk_tree_lock);
 }
 
 static inline void zk_tree_destroy(void)
@@ -471,13 +471,13 @@ static inline void zk_tree_destroy(void)
 	struct zk_node *zk;
 	int i;
 
-	pthread_rwlock_wrlock(&zk_tree_lock);
+	sd_mutex_wrlock(&zk_tree_lock);
 	for (i = 0; i < nr_sd_nodes; i++) {
 		zk = zk_tree_search_nolock(&sd_nodes[i].nid);
 		if (zk)
 			zk_tree_del_nolock(zk);
 	}
-	pthread_rwlock_unlock(&zk_tree_lock);
+	sd_mutex_unlock(&zk_tree_lock);
 }
 
 static inline void build_node_list(void)
@@ -575,11 +575,11 @@ static void zk_watcher(zhandle_t *zh, int type, int state, const char *path,
 		p++;
 		str_to_node(p, &znode.node);
 		/* FIXME: remove redundant leave events */
-		pthread_rwlock_rdlock(&zk_tree_lock);
+		sd_mutex_rdlock(&zk_tree_lock);
 		n = zk_tree_search_nolock(&znode.node.nid);
 		if (n)
 			n->gone = true;
-		pthread_rwlock_unlock(&zk_tree_lock);
+		sd_mutex_unlock(&zk_tree_lock);
 		if (n)
 			add_event(EVENT_LEAVE, &znode, NULL, 0);
 	}
@@ -730,7 +730,7 @@ static void zk_compete_master(void)
 	 * This is to protect master_seq and my_seq because this function will
 	 * be called by both main thread and zookeeper's event thread.
 	 */
-	pthread_rwlock_wrlock(&zk_compete_master_lock);
+	sd_mutex_wrlock(&zk_compete_master_lock);
 
 	if (uatomic_is_true(&is_master) || uatomic_is_true(&stop))
 		goto out_unlock;
@@ -782,7 +782,7 @@ success:
 	uatomic_set_true(&is_master);
 	sd_dprintf("success");
 out_unlock:
-	pthread_rwlock_unlock(&zk_compete_master_lock);
+	sd_mutex_unlock(&zk_compete_master_lock);
 }
 
 static int zk_join(const struct sd_node *myself,
@@ -991,12 +991,12 @@ static void zk_handle_update_node(struct zk_event *ev)
 	if (node_eq(snode, &this_node.node))
 		this_node.node = *snode;
 
-	pthread_rwlock_rdlock(&zk_tree_lock);
+	sd_mutex_rdlock(&zk_tree_lock);
 	t = zk_tree_search_nolock(&snode->nid);
 	assert(t);
 	t->node = *snode;
 	build_node_list();
-	pthread_rwlock_unlock(&zk_tree_lock);
+	sd_mutex_unlock(&zk_tree_lock);
 	sd_update_node_handler(snode);
 }
 
diff --git a/sheep/md.c b/sheep/md.c
index 6934850..2facd74 100644
--- a/sheep/md.c
+++ b/sheep/md.c
@@ -30,7 +30,7 @@ struct vdisk {
 static struct disk md_disks[MD_MAX_DISK];
 static struct vdisk md_vds[MD_MAX_VDISK];
 
-static pthread_rwlock_t md_lock = PTHREAD_RWLOCK_INITIALIZER;
+static sd_mutex_t md_lock = SD_MUTEX_INITIALIZER;
 static int md_nr_disks; /* Protected by md_lock */
 static int md_nr_vds;
 
@@ -38,9 +38,9 @@ static inline int nr_online_disks(void)
 {
 	int nr;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_mutex_rdlock(&md_lock);
 	nr = md_nr_disks;
-	pthread_rwlock_unlock(&md_lock);
+	sd_mutex_unlock(&md_lock);
 
 	return nr;
 }
@@ -337,10 +337,10 @@ char *md_get_object_path(uint64_t oid)
 	struct vdisk *vd;
 	char *p;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_mutex_rdlock(&md_lock);
 	vd = oid_to_vdisk(oid);
 	p = md_disks[vd->idx].path;
-	pthread_rwlock_unlock(&md_lock);
+	sd_mutex_unlock(&md_lock);
 	sd_dprintf("%d, %s", vd->idx, p);
 
 	return p;
@@ -360,14 +360,14 @@ int for_each_object_in_wd(int (*func)(uint64_t oid, char *path, uint32_t epoch,
 {
 	int i, ret = SD_RES_SUCCESS;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_mutex_rdlock(&md_lock);
 	for (i = 0; i < md_nr_disks; i++) {
 		ret = for_each_object_in_path(md_disks[i].path, func,
 					      cleanup, arg);
 		if (ret != SD_RES_SUCCESS)
 			break;
 	}
-	pthread_rwlock_unlock(&md_lock);
+	sd_mutex_unlock(&md_lock);
 	return ret;
 }
 
@@ -378,7 +378,7 @@ int for_each_object_in_stale(int (*func)(uint64_t oid, char *path,
 	int i, ret = SD_RES_SUCCESS;
 	char path[PATH_MAX];
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_mutex_rdlock(&md_lock);
 	for (i = 0; i < md_nr_disks; i++) {
 		snprintf(path, sizeof(path), "%s/.stale", md_disks[i].path);
 		sd_eprintf("%s", path);
@@ -386,7 +386,7 @@ int for_each_object_in_stale(int (*func)(uint64_t oid, char *path,
 		if (ret != SD_RES_SUCCESS)
 			break;
 	}
-	pthread_rwlock_unlock(&md_lock);
+	sd_mutex_unlock(&md_lock);
 	return ret;
 }
 
@@ -395,13 +395,13 @@ int for_each_obj_path(int (*func)(char *path))
 {
 	int i, ret = SD_RES_SUCCESS;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_mutex_rdlock(&md_lock);
 	for (i = 0; i < md_nr_disks; i++) {
 		ret = func(md_disks[i].path);
 		if (ret != SD_RES_SUCCESS)
 			break;
 	}
-	pthread_rwlock_unlock(&md_lock);
+	sd_mutex_unlock(&md_lock);
 	return ret;
 }
 
@@ -423,7 +423,7 @@ static void md_do_recover(struct work *work)
 	struct md_work *mw = container_of(work, struct md_work, work);
 	int idx, nr = 0;
 
-	pthread_rwlock_wrlock(&md_lock);
+	sd_mutex_wrlock(&md_lock);
 	idx = path_to_disk_idx(mw->path);
 	if (idx < 0)
 		/* Just ignore the duplicate EIO of the same path */
@@ -432,7 +432,7 @@ static void md_do_recover(struct work *work)
 	md_init_space();
 	nr = md_nr_disks;
 out:
-	pthread_rwlock_unlock(&md_lock);
+	sd_mutex_unlock(&md_lock);
 
 	if (nr > 0)
 		kick_recover();
@@ -549,13 +549,13 @@ static int scan_wd(uint64_t oid, uint32_t epoch)
 {
 	int i, ret = SD_RES_EIO;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_mutex_rdlock(&md_lock);
 	for (i = 0; i < md_nr_disks; i++) {
 		ret = md_check_and_move(oid, epoch, md_disks[i].path);
 		if (ret == SD_RES_SUCCESS)
 			break;
 	}
-	pthread_rwlock_unlock(&md_lock);
+	sd_mutex_unlock(&md_lock);
 	return ret;
 }
 
@@ -598,7 +598,7 @@ uint32_t md_get_info(struct sd_md_info *info)
 	int i;
 
 	memset(info, 0, ret);
-	pthread_rwlock_rdlock(&md_lock);
+	sd_mutex_rdlock(&md_lock);
 	for (i = 0; i < md_nr_disks; i++) {
 		info->disk[i].idx = i;
 		pstrcpy(info->disk[i].path, PATH_MAX, md_disks[i].path);
@@ -607,7 +607,7 @@ uint32_t md_get_info(struct sd_md_info *info)
 							&info->disk[i].used);
 	}
 	info->nr = md_nr_disks;
-	pthread_rwlock_unlock(&md_lock);
+	sd_mutex_unlock(&md_lock);
 	return ret;
 }
 
@@ -627,7 +627,7 @@ static int do_plug_unplug(char *disks, bool plug)
 	char *path;
 	int old_nr, cur_nr = 0, ret = SD_RES_UNKNOWN;
 
-	pthread_rwlock_wrlock(&md_lock);
+	sd_mutex_wrlock(&md_lock);
 	old_nr = md_nr_disks;
 	path = strtok(disks, ",");
 	do {
@@ -648,7 +648,7 @@ static int do_plug_unplug(char *disks, bool plug)
 
 	ret = SD_RES_SUCCESS;
 out:
-	pthread_rwlock_unlock(&md_lock);
+	sd_mutex_unlock(&md_lock);
 
 	/*
 	 * We have to kick recover aggressively because there is possibility
@@ -676,10 +676,10 @@ uint64_t md_get_size(uint64_t *used)
 	uint64_t fsize = 0;
 	*used = 0;
 
-	pthread_rwlock_rdlock(&md_lock);
+	sd_mutex_rdlock(&md_lock);
 	for (int i = 0; i < md_nr_disks; i++)
 		fsize += get_path_free_size(md_disks[i].path, used);
-	pthread_rwlock_unlock(&md_lock);
+	sd_mutex_unlock(&md_lock);
 
 	return fsize + *used;
 }
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index dcf6972..4e222b4 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -49,7 +49,7 @@ struct object_cache_entry {
 	struct list_head dirty_list; /* For dirty list of object cache */
 	struct list_head lru_list; /* For lru list of object cache */
 
-	pthread_rwlock_t lock; /* Entry lock */
+	sd_mutex_t lock; /* Entry lock */
 };
 
 struct object_cache {
@@ -63,7 +63,7 @@ struct object_cache {
 	int push_efd; /* Used to synchronize between pusher and push threads */
 	uatomic_bool in_push; /* Whether if pusher is running */
 
-	pthread_rwlock_t lock; /* Cache lock */
+	sd_mutex_t lock; /* Cache lock */
 };
 
 struct push_work {
@@ -79,8 +79,8 @@ static int def_open_flags = O_RDWR;
 #define HASH_BITS	5
 #define HASH_SIZE	(1 << HASH_BITS)
 
-static pthread_rwlock_t hashtable_lock[HASH_SIZE] = {
-	[0 ... HASH_SIZE - 1] = PTHREAD_RWLOCK_INITIALIZER
+static sd_mutex_t hashtable_lock[HASH_SIZE] = {
+	[0 ... HASH_SIZE - 1] = SD_MUTEX_INITIALIZER
 };
 
 static struct hlist_head cache_hashtable[HASH_SIZE];
@@ -160,32 +160,32 @@ static inline bool entry_in_use(struct object_cache_entry *entry)
  */
 static inline void read_lock_cache(struct object_cache *oc)
 {
-	pthread_rwlock_rdlock(&oc->lock);
+	sd_mutex_rdlock(&oc->lock);
 }
 
 static inline void write_lock_cache(struct object_cache *oc)
 {
-	pthread_rwlock_wrlock(&oc->lock);
+	sd_mutex_wrlock(&oc->lock);
 }
 
 static inline void unlock_cache(struct object_cache *oc)
 {
-	pthread_rwlock_unlock(&oc->lock);
+	sd_mutex_unlock(&oc->lock);
 }
 
 static inline void read_lock_entry(struct object_cache_entry *entry)
 {
-	pthread_rwlock_rdlock(&entry->lock);
+	sd_mutex_rdlock(&entry->lock);
 }
 
 static inline void write_lock_entry(struct object_cache_entry *entry)
 {
-	pthread_rwlock_wrlock(&entry->lock);
+	sd_mutex_wrlock(&entry->lock);
 }
 
 static inline void unlock_entry(struct object_cache_entry *entry)
 {
-	pthread_rwlock_unlock(&entry->lock);
+	sd_mutex_unlock(&entry->lock);
 }
 
 static struct object_cache_entry *
@@ -292,7 +292,7 @@ free_cache_entry(struct object_cache_entry *entry)
 	list_del_init(&entry->lru_list);
 	if (!list_empty(&entry->dirty_list))
 		del_from_dirty_list(entry);
-	pthread_rwlock_destroy(&entry->lock);
+	sd_mutex_destroy(&entry->lock);
 	free(entry);
 }
 
@@ -589,19 +589,19 @@ static void do_reclaim(struct work *work)
 		int idx = (i + j) % HASH_SIZE;
 		struct hlist_head *head = cache_hashtable + idx;
 
-		pthread_rwlock_rdlock(&hashtable_lock[idx]);
+		sd_mutex_rdlock(&hashtable_lock[idx]);
 		hlist_for_each_entry(cache, node, head, hash) {
 			uint32_t cap;
 
 			do_reclaim_object(cache);
 			cap = uatomic_read(&gcache.capacity);
 			if (cap <= HIGH_WATERMARK) {
-				pthread_rwlock_unlock(&hashtable_lock[idx]);
+				sd_mutex_unlock(&hashtable_lock[idx]);
 				sd_dprintf("complete, capacity %"PRIu32, cap);
 				return;
 			}
 		}
-		pthread_rwlock_unlock(&hashtable_lock[idx]);
+		sd_mutex_unlock(&hashtable_lock[idx]);
 	}
 	sd_dprintf("finished");
 }
@@ -634,9 +634,9 @@ static struct object_cache *find_object_cache(uint32_t vid, bool create)
 	struct hlist_node *node;
 
 	if (create)
-		pthread_rwlock_wrlock(&hashtable_lock[h]);
+		sd_mutex_wrlock(&hashtable_lock[h]);
 	else
-		pthread_rwlock_rdlock(&hashtable_lock[h]);
+		sd_mutex_rdlock(&hashtable_lock[h]);
 
 	if (hlist_empty(head))
 		goto not_found;
@@ -656,13 +656,13 @@ not_found:
 		INIT_LIST_HEAD(&cache->dirty_head);
 		INIT_LIST_HEAD(&cache->lru_head);
 
-		pthread_rwlock_init(&cache->lock, NULL);
+		sd_mutex_init(&cache->lock);
 		hlist_add_head(&cache->hash, head);
 	} else {
 		cache = NULL;
 	}
 out:
-	pthread_rwlock_unlock(&hashtable_lock[h]);
+	sd_mutex_unlock(&hashtable_lock[h]);
 	return cache;
 }
 
@@ -695,7 +695,7 @@ alloc_cache_entry(struct object_cache *oc, uint32_t idx)
 	entry = xzalloc(sizeof(*entry));
 	entry->oc = oc;
 	entry->idx = idx;
-	pthread_rwlock_init(&entry->lock, NULL);
+	sd_mutex_init(&entry->lock);
 	INIT_LIST_HEAD(&entry->dirty_list);
 	INIT_LIST_HEAD(&entry->lru_list);
 
@@ -977,9 +977,9 @@ void object_cache_delete(uint32_t vid)
 		return;
 
 	/* Firstly we free memeory */
-	pthread_rwlock_wrlock(&hashtable_lock[h]);
+	sd_mutex_wrlock(&hashtable_lock[h]);
 	hlist_del(&cache->hash);
-	pthread_rwlock_unlock(&hashtable_lock[h]);
+	sd_mutex_unlock(&hashtable_lock[h]);
 
 	write_lock_cache(cache);
 	list_for_each_entry_safe(entry, t, &cache->lru_head, lru_list) {
@@ -987,7 +987,7 @@ void object_cache_delete(uint32_t vid)
 		uatomic_sub(&gcache.capacity, CACHE_OBJECT_SIZE);
 	}
 	unlock_cache(cache);
-	pthread_rwlock_destroy(&cache->lock);
+	sd_mutex_destroy(&cache->lock);
 	close(cache->push_efd);
 	free(cache);
 
diff --git a/sheep/object_list_cache.c b/sheep/object_list_cache.c
index 6d5139f..a88c588 100644
--- a/sheep/object_list_cache.c
+++ b/sheep/object_list_cache.c
@@ -26,7 +26,7 @@ struct objlist_cache {
 	uint64_t *buf;
 	struct list_head entry_list;
 	struct rb_root root;
-	pthread_rwlock_t lock;
+	sd_mutex_t lock;
 };
 
 struct objlist_deletion_work {
@@ -38,7 +38,7 @@ static struct objlist_cache obj_list_cache = {
 	.tree_version	= 1,
 	.root		= RB_ROOT,
 	.entry_list     = LIST_HEAD_INIT(obj_list_cache.entry_list),
-	.lock		= PTHREAD_RWLOCK_INITIALIZER,
+	.lock		= SD_MUTEX_INITIALIZER,
 };
 
 static struct objlist_cache_entry *objlist_cache_rb_insert(struct rb_root *root,
@@ -92,12 +92,12 @@ static int objlist_cache_rb_remove(struct rb_root *root, uint64_t oid)
 
 void objlist_cache_remove(uint64_t oid)
 {
-	pthread_rwlock_wrlock(&obj_list_cache.lock);
+	sd_mutex_wrlock(&obj_list_cache.lock);
 	if (!objlist_cache_rb_remove(&obj_list_cache.root, oid)) {
 		obj_list_cache.cache_size--;
 		obj_list_cache.tree_version++;
 	}
-	pthread_rwlock_unlock(&obj_list_cache.lock);
+	sd_mutex_unlock(&obj_list_cache.lock);
 }
 
 int objlist_cache_insert(uint64_t oid)
@@ -108,7 +108,7 @@ int objlist_cache_insert(uint64_t oid)
 	entry->oid = oid;
 	rb_init_node(&entry->node);
 
-	pthread_rwlock_wrlock(&obj_list_cache.lock);
+	sd_mutex_wrlock(&obj_list_cache.lock);
 	p = objlist_cache_rb_insert(&obj_list_cache.root, entry);
 	if (p)
 		free(entry);
@@ -117,7 +117,7 @@ int objlist_cache_insert(uint64_t oid)
 		obj_list_cache.cache_size++;
 		obj_list_cache.tree_version++;
 	}
-	pthread_rwlock_unlock(&obj_list_cache.lock);
+	sd_mutex_unlock(&obj_list_cache.lock);
 
 	return 0;
 }
@@ -128,13 +128,13 @@ int get_obj_list(const struct sd_req *hdr, struct sd_rsp *rsp, void *data)
 	struct objlist_cache_entry *entry;
 
 	/* first try getting the cached buffer with only a read lock held */
-	pthread_rwlock_rdlock(&obj_list_cache.lock);
+	sd_mutex_rdlock(&obj_list_cache.lock);
 	if (obj_list_cache.tree_version == obj_list_cache.buf_version)
 		goto out;
 
 	/* if that fails grab a write lock for the usually nessecary update */
-	pthread_rwlock_unlock(&obj_list_cache.lock);
-	pthread_rwlock_wrlock(&obj_list_cache.lock);
+	sd_mutex_unlock(&obj_list_cache.lock);
+	sd_mutex_wrlock(&obj_list_cache.lock);
 	if (obj_list_cache.tree_version == obj_list_cache.buf_version)
 		goto out;
 
@@ -148,14 +148,14 @@ int get_obj_list(const struct sd_req *hdr, struct sd_rsp *rsp, void *data)
 
 out:
 	if (hdr->data_length < obj_list_cache.cache_size * sizeof(uint64_t)) {
-		pthread_rwlock_unlock(&obj_list_cache.lock);
+		sd_mutex_unlock(&obj_list_cache.lock);
 		sd_eprintf("GET_OBJ_LIST buffer too small");
 		return SD_RES_BUFFER_SMALL;
 	}
 
 	rsp->data_length = obj_list_cache.cache_size * sizeof(uint64_t);
 	memcpy(data, obj_list_cache.buf, rsp->data_length);
-	pthread_rwlock_unlock(&obj_list_cache.lock);
+	sd_mutex_unlock(&obj_list_cache.lock);
 	return SD_RES_SUCCESS;
 }
 
@@ -179,7 +179,7 @@ static void objlist_deletion_work(struct work *work)
 		return;
 	}
 
-	pthread_rwlock_wrlock(&obj_list_cache.lock);
+	sd_mutex_wrlock(&obj_list_cache.lock);
 	list_for_each_entry_safe(entry, t, &obj_list_cache.entry_list, list) {
 		entry_vid = oid_to_vid(entry->oid);
 		if (entry_vid != vid)
@@ -189,7 +189,7 @@ static void objlist_deletion_work(struct work *work)
 		rb_erase(&entry->node, &obj_list_cache.root);
 		free(entry);
 	}
-	pthread_rwlock_unlock(&obj_list_cache.lock);
+	sd_mutex_unlock(&obj_list_cache.lock);
 }
 
 static void objlist_deletion_done(struct work *work)
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 6839bba..ff33558 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -20,7 +20,7 @@ struct vdi_state_entry {
 
 static uint32_t max_copies;
 static struct rb_root vdi_state_root = RB_ROOT;
-static pthread_rwlock_t vdi_state_lock = PTHREAD_RWLOCK_INITIALIZER;
+static sd_mutex_t vdi_state_lock = SD_MUTEX_INITIALIZER;
 
 static struct vdi_state_entry *vdi_state_search(struct rb_root *root,
 						uint32_t vid)
@@ -70,9 +70,9 @@ static bool vid_is_snapshot(uint32_t vid)
 {
 	struct vdi_state_entry *entry;
 
-	pthread_rwlock_rdlock(&vdi_state_lock);
+	sd_mutex_rdlock(&vdi_state_lock);
 	entry = vdi_state_search(&vdi_state_root, vid);
-	pthread_rwlock_unlock(&vdi_state_lock);
+	sd_mutex_unlock(&vdi_state_lock);
 
 	if (!entry) {
 		sd_eprintf("No VDI entry for %" PRIx32 " found", vid);
@@ -95,9 +95,9 @@ int get_vdi_copy_number(uint32_t vid)
 {
 	struct vdi_state_entry *entry;
 
-	pthread_rwlock_rdlock(&vdi_state_lock);
+	sd_mutex_rdlock(&vdi_state_lock);
 	entry = vdi_state_search(&vdi_state_root, vid);
-	pthread_rwlock_unlock(&vdi_state_lock);
+	sd_mutex_unlock(&vdi_state_lock);
 
 	if (!entry) {
 		sd_eprintf("No VDI copy entry for %" PRIx32 " found", vid);
@@ -145,7 +145,7 @@ int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot)
 
 	sd_dprintf("%" PRIx32 ", %d", vid, nr_copies);
 
-	pthread_rwlock_wrlock(&vdi_state_lock);
+	sd_mutex_wrlock(&vdi_state_lock);
 	old = vdi_state_insert(&vdi_state_root, entry);
 	if (old) {
 		free(entry);
@@ -157,7 +157,7 @@ int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot)
 	if (uatomic_read(&max_copies) == 0 ||
 	    nr_copies > uatomic_read(&max_copies))
 		uatomic_set(&max_copies, nr_copies);
-	pthread_rwlock_unlock(&vdi_state_lock);
+	sd_mutex_unlock(&vdi_state_lock);
 
 	return SD_RES_SUCCESS;
 }
@@ -169,7 +169,7 @@ int fill_vdi_state_list(void *data)
 	struct vdi_state *vs = data;
 	struct vdi_state_entry *entry;
 
-	pthread_rwlock_rdlock(&vdi_state_lock);
+	sd_mutex_rdlock(&vdi_state_lock);
 	for (n = rb_first(&vdi_state_root); n; n = rb_next(n)) {
 		entry = rb_entry(n, struct vdi_state_entry, node);
 		memset(vs, 0, sizeof(*vs));
@@ -179,7 +179,7 @@ int fill_vdi_state_list(void *data)
 		vs++;
 		nr++;
 	}
-	pthread_rwlock_unlock(&vdi_state_lock);
+	sd_mutex_unlock(&vdi_state_lock);
 
 	return nr * sizeof(*vs);
 }
@@ -953,7 +953,7 @@ void clean_vdi_state(void)
 	struct rb_node *current_node = rb_first(&vdi_state_root);
 	struct vdi_state_entry *entry = NULL;
 
-	pthread_rwlock_wrlock(&vdi_state_lock);
+	sd_mutex_wrlock(&vdi_state_lock);
 	while (current_node) {
 		entry = rb_entry(current_node, struct vdi_state_entry, node);
 		rb_erase(current_node, &vdi_state_root);
@@ -962,5 +962,5 @@ void clean_vdi_state(void)
 		current_node = rb_first(&vdi_state_root);
 	}
 	INIT_RB_ROOT(&vdi_state_root);
-	pthread_rwlock_unlock(&vdi_state_lock);
+	sd_mutex_unlock(&vdi_state_lock);
 }
diff --git a/sheepfs/volume.c b/sheepfs/volume.c
index efd00ce..ce04b3a 100644
--- a/sheepfs/volume.c
+++ b/sheepfs/volume.c
@@ -62,7 +62,7 @@ struct vdi_inode {
 };
 
 static struct rb_root vdi_inode_tree = RB_ROOT;
-static pthread_rwlock_t vdi_inode_tree_lock = PTHREAD_RWLOCK_INITIALIZER;
+static sd_mutex_t vdi_inode_tree_lock = SD_MUTEX_INITIALIZER;
 
 static struct vdi_inode *vdi_inode_tree_insert(struct vdi_inode *new)
 {
@@ -147,9 +147,9 @@ static int volume_rw_object(char *buf, uint64_t oid, size_t size,
 	unsigned long idx = 0;
 	uint64_t cow_oid = 0;
 
-	pthread_rwlock_rdlock(&vdi_inode_tree_lock);
+	sd_mutex_rdlock(&vdi_inode_tree_lock);
 	vdi = vdi_inode_tree_search(vid);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_mutex_unlock(&vdi_inode_tree_lock);
 
 	if (is_data_obj(oid)) {
 		idx = data_oid_to_idx(oid);
@@ -291,9 +291,9 @@ static int volume_do_sync(uint32_t vid)
 	int ret, fd, idx;
 	struct vdi_inode *vdi;
 
-	pthread_rwlock_rdlock(&vdi_inode_tree_lock);
+	sd_mutex_rdlock(&vdi_inode_tree_lock);
 	vdi = vdi_inode_tree_search(vid);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_mutex_unlock(&vdi_inode_tree_lock);
 
 	hdr.opcode = SD_OP_FLUSH_VDI;
 	hdr.obj.oid = vid_to_vdi_oid(vid);
@@ -368,7 +368,7 @@ int reset_socket_pool(void)
 	struct vdi_inode *vdi;
 	int ret = 0;
 
-	pthread_rwlock_rdlock(&vdi_inode_tree_lock);
+	sd_mutex_rdlock(&vdi_inode_tree_lock);
 	for (node = rb_first(&vdi_inode_tree); node; node = rb_next(node)) {
 		vdi = rb_entry(node, struct vdi_inode, rb);
 		destroy_socket_pool(vdi->socket_pool, SOCKET_POOL_SIZE);
@@ -379,7 +379,7 @@ int reset_socket_pool(void)
 		}
 	}
 out:
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_mutex_unlock(&vdi_inode_tree_lock);
 	return ret;
 }
 
@@ -414,9 +414,9 @@ static int init_vdi_info(const char *entry, uint32_t *vid, size_t *size)
 		goto err;
 	}
 	/* we need insert inode before calling volume_rw_object */
-	pthread_rwlock_wrlock(&vdi_inode_tree_lock);
+	sd_mutex_wrlock(&vdi_inode_tree_lock);
 	dummy = vdi_inode_tree_insert(inode);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_mutex_unlock(&vdi_inode_tree_lock);
 	if (dummy)
 		goto err;
 	if (volume_rw_object(inode_buf, vid_to_vdi_oid(*vid), SD_INODE_SIZE,
@@ -478,9 +478,9 @@ static int volume_sync_and_delete(uint32_t vid)
 	int ret, fd, idx;
 	struct vdi_inode *vdi;
 
-	pthread_rwlock_rdlock(&vdi_inode_tree_lock);
+	sd_mutex_rdlock(&vdi_inode_tree_lock);
 	vdi = vdi_inode_tree_search(vid);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_mutex_unlock(&vdi_inode_tree_lock);
 
 	hdr.opcode = SD_OP_FLUSH_DEL_CACHE;
 	hdr.obj.oid = vid_to_vdi_oid(vid);
@@ -517,14 +517,14 @@ int volume_remove_entry(const char *entry)
 	if (sheepfs_object_cache && volume_sync_and_delete(vid) < 0)
 		return -1;
 
-	pthread_rwlock_rdlock(&vdi_inode_tree_lock);
+	sd_mutex_rdlock(&vdi_inode_tree_lock);
 	vdi = vdi_inode_tree_search(vid);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_mutex_unlock(&vdi_inode_tree_lock);
 	destroy_socket_pool(vdi->socket_pool, SOCKET_POOL_SIZE);
 
-	pthread_rwlock_wrlock(&vdi_inode_tree_lock);
+	sd_mutex_wrlock(&vdi_inode_tree_lock);
 	rb_erase(&vdi->rb, &vdi_inode_tree);
-	pthread_rwlock_unlock(&vdi_inode_tree_lock);
+	sd_mutex_unlock(&vdi_inode_tree_lock);
 
 	free(vdi->inode);
 	free(vdi);
-- 
1.8.1.3.566.gaa39828




More information about the sheepdog mailing list