[sheepdog] [PATCH stable-0.8 3/6] sheep/cluster: let local_lock() support nested

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Tue Mar 18 07:30:56 CET 2014


From: Robin Dong <sanbai at taobao.com>

In kv_create_object(), we need to lock bucket and then lock allocator of it:

    lock(bucket_vid)
        lock(data_vid)
        unlock(data_vid)
    unlock(bucket_vid)

so the implementations of ->lock() for 'local' in cluster must support nested
lock.

The pthread_mutex_t with attribute of "PTHREAD_PROCESS_SHARED" could used by
threads in different processes.
(ref: http://stackoverflow.com/questions/17809231/mutex-attribute-pthread-process-shared-inverts-logic,
http://royontechnology.blogspot.com/2007/06/comparison-of-pthreadprocessshared-in.html)

First, we create a 'global_lock' to protect lock_tree, then we can create new
'SHARED' lock to use. Since the pthread_mutex_t is actually store in files, we
must check the exists of files before init mutex on mmap-memory.

Signed-off-by: Robin Dong <sanbai at taobao.com>
Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
 include/util.h        |  14 +++++++
 sheep/cluster/local.c | 109 +++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 105 insertions(+), 18 deletions(-)

diff --git a/include/util.h b/include/util.h
index e2bb90d..a1d8bac 100644
--- a/include/util.h
+++ b/include/util.h
@@ -286,6 +286,20 @@ static inline void sd_init_mutex(struct sd_mutex *mutex)
 		panic("failed to initialize a lock, %s", strerror(ret));
 }
 
+static inline void sd_init_mutex_attr(struct sd_mutex *mutex,
+				      pthread_mutexattr_t *attr)
+{
+	int ret;
+
+	do {
+		ret = pthread_mutex_init(&mutex->mutex, attr);
+	} while (ret == EAGAIN);
+
+	if (unlikely(ret != 0))
+		panic("failed to initialize a lock with attr, %s",
+		      strerror(ret));
+}
+
 static inline void sd_destroy_mutex(struct sd_mutex *mutex)
 {
 	int ret;
diff --git a/sheep/cluster/local.c b/sheep/cluster/local.c
index 7f9a6eb..e447822 100644
--- a/sheep/cluster/local.c
+++ b/sheep/cluster/local.c
@@ -30,17 +30,24 @@
 
 static const char *shmfile = "/tmp/sheepdog_shm";
 static const char *lockdir = "/tmp/sheepdog_locks/";
-/*
- * we have to use sd_rw_lock because flock isn't thread exclusive
- * and it also serves to project lock_tree
- */
-static struct sd_rw_lock lock_tree_lock = SD_RW_LOCK_INITIALIZER;
+
+/* use lock_tree to find lock quickly */
 static struct rb_root lock_tree_root = RB_ROOT;
 
+/* use global_lock to protect lock_tree */
+static struct sd_mutex *global_lock;
+
+/*
+ * a lock may be used by several processes(or threads) at the same time,
+ * so we should add 'ref' to avoid one process release a lock which
+ * still used by another process.
+ */
 struct lock_entry {
 	struct rb_node rb;
 	int fd;
 	uint64_t lock_id;
+	uint64_t ref;
+	struct sd_mutex *mutex;
 };
 
 static int shmfd;
@@ -539,10 +546,63 @@ static int local_get_local_addr(uint8_t *myaddr)
 	return 0;
 }
 
+/*
+ * pthread_mutex with attribute of PTHREAD_PROCESS_SHARED could be
+ * used by different threads in different processes.
+ * We put pthread_mutex_t in shared-memory so any process could easily
+ * get it.
+ */
+static struct sd_mutex *get_shared_lock(const char *path, int *fd)
+{
+	struct sd_mutex *pmutex;
+	pthread_mutexattr_t mutex_attr;
+	int ret, flags = O_RDWR;
+	bool created = false;
+
+	ret = access(path, R_OK|W_OK);
+	if (!ret)
+		created = true;
+	else if (errno != ENOENT)
+		panic("failed to access %s, %m", path);
+
+	if (!created)
+		flags |= O_CREAT;
+
+	*fd = open(path, flags, sd_def_fmode);
+	if (*fd < 0)
+		panic("failed to open %s, %m", path);
+
+	if (!created) {
+		ret = ftruncate(*fd, sizeof(pthread_mutex_t));
+		if (ret < 0)
+			panic("failed to ftruncate %s, %m", path);
+	}
+
+	pmutex = (struct sd_mutex *)mmap(NULL, sizeof(struct sd_mutex),
+					 PROT_READ|PROT_WRITE,
+					 MAP_SHARED, *fd, 0);
+	if (!pmutex)
+		panic("failed to mmap %s, %m", path);
+
+	if (!created) {
+		if (pthread_mutexattr_init(&mutex_attr))
+			panic("failed to init mutexattr, %m");
+
+		if (pthread_mutexattr_setpshared(&mutex_attr,
+						 PTHREAD_PROCESS_SHARED))
+			panic("failed to setpshared mutexattr, %m");
+
+		sd_init_mutex_attr(pmutex, &mutex_attr);
+	}
+
+	return pmutex;
+}
+
 static int local_init(const char *option)
 {
 	sigset_t mask;
-	int ret;
+	int ret, fd;
+	char path[PATH_MAX];
 	static struct timer t = {
 		.callback = check_pids,
 		.data = &t,
@@ -583,6 +643,9 @@ static int local_init(const char *option)
 		return -1;
 	}
 
+	snprintf(path, sizeof(path), "%s%s", lockdir, "global_lock");
+	global_lock = get_shared_lock(path, &fd);
+	sd_debug("create global_lock");
 	return 0;
 }
 
@@ -609,41 +672,51 @@ static void local_lock(uint64_t lock_id)
 {
 	struct lock_entry *entry;
 
-	sd_write_lock(&lock_tree_lock);
+	sd_mutex_lock(global_lock);
+
 	entry = lock_tree_lookup(lock_id);
 	if (!entry) {
 		char path[PATH_MAX];
 		int fd;
 
 		snprintf(path, sizeof(path), "%s%016"PRIx64, lockdir, lock_id);
-		fd = open(path, O_RDONLY | O_CREAT, sd_def_fmode);
-		if (fd < 0)
-			panic("failed to open %s, %m", path);
 		entry = xmalloc(sizeof(*entry));
 		entry->lock_id = lock_id;
+		entry->mutex = get_shared_lock(path, &fd);
 		entry->fd = fd;
+		entry->ref = 0;
 		lock_tree_add(entry);
 	}
 
-	if (xflock(entry->fd, LOCK_EX) < 0)
-		panic("lock failed %"PRIx64", %m", lock_id);
+	entry->ref++;
+
+	sd_mutex_unlock(global_lock);
+
+	sd_mutex_lock(entry->mutex);
 }
 
 static void local_unlock(uint64_t lock_id)
 {
 	struct lock_entry *entry;
 
+	sd_mutex_lock(global_lock);
+
 	entry = lock_tree_lookup(lock_id);
 	if (!entry)
 		panic("can't find fd for lock %"PRIx64, lock_id);
 
-	if (xflock(entry->fd, LOCK_UN) < 0)
-		panic("unlock failed %"PRIx64", %m", lock_id);
+	sd_mutex_unlock(entry->mutex);
+
+	entry->ref--;
+
+	if (!entry->ref) {
+		munmap(entry->mutex, sizeof(pthread_mutex_t));
+		close(entry->fd);
+		rb_erase(&entry->rb, &lock_tree_root);
+		free(entry);
+	}
 
-	close(entry->fd);
-	rb_erase(&entry->rb, &lock_tree_root);
-	free(entry);
-	sd_rw_unlock(&lock_tree_lock);
+	sd_mutex_unlock(global_lock);
 }
 
 static int local_update_node(struct sd_node *node)
-- 
1.8.1.2




More information about the sheepdog mailing list