[sheepdog] [PATCH v2] sheep: implement a correct mutex of base directory

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Tue Apr 2 07:24:02 CEST 2013


sheep employes lockf() for mutex of lock_base_dir() now. But the
lockf() is not suitable for sheep bacause sheep calls daemon(3) after
the lockf(). daemon(3) forks internally and a parent process exits
immediately. In a case of sheep, daemon() must be called after locking
base dir so the lock owner, parent process, exits and the lock will be
released even though the child process is running. This is the reason
current lock_base_dir() doesn't work well.

This patch makes lock_base_dir() correct with a new mechanism based on
O_EXCL of open(2).

Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
v2: call exit_handler() from crash_handler() too

 sheep/sheep.c      |   12 ++++++++++++
 sheep/sheep_priv.h |    1 +
 sheep/store.c      |   37 +++++++++++++++++++++----------------
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/sheep/sheep.c b/sheep/sheep.c
index 4fa7d58..aee63fa 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -175,6 +175,11 @@ static int init_signal(void)
 	return 0;
 }
 
+static void exit_handler(void)
+{
+	unlock_base_dir();
+}
+
 static void crash_handler(int signo)
 {
 	sd_printf(SDOG_EMERG, "sheep exits unexpectedly (%s).",
@@ -182,6 +187,12 @@ static void crash_handler(int signo)
 
 	sd_backtrace();
 	sd_dump_variable(__sys);
+
+	/*
+	 * We have to call exit_handler() manually.
+	 * In an ordinal exiting, the function is called during exit().
+	 */
+	exit_handler();
 }
 
 static struct cluster_info __sys;
@@ -404,6 +415,7 @@ int main(int argc, char **argv)
 
 	install_crash_handler(crash_handler);
 	signal(SIGPIPE, SIG_IGN);
+	atexit(exit_handler);
 
 	long_options = build_long_options(sheep_options);
 	short_options = build_short_options(sheep_options);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 4267efd..d75f5c3 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -223,6 +223,7 @@ int init_store_driver(bool is_gateway);
 int init_global_pathnames(const char *d, char *);
 int init_base_path(const char *dir);
 int init_disk_space(const char *d);
+void unlock_base_dir(void);
 
 int fill_vdi_copy_list(void *data);
 int get_vdi_copy_number(uint32_t vid);
diff --git a/sheep/store.c b/sheep/store.c
index 76250b3..a789fc5 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -194,37 +194,42 @@ again:
 
 #define LOCK_PATH "/lock"
 
+static char *lock_path;
+static int lock_fd = -1;
+
 static int lock_base_dir(const char *d)
 {
-	char *lock_path;
 	int ret = 0;
-	int fd, len = strlen(d) + strlen(LOCK_PATH) + 1;
+	int len = strlen(d) + strlen(LOCK_PATH) + 1;
 
 	lock_path = xzalloc(len);
 	snprintf(lock_path, len, "%s" LOCK_PATH, d);
 
-	fd = open(lock_path, O_WRONLY|O_CREAT, def_fmode);
-	if (fd < 0) {
+	lock_fd = open(lock_path, O_WRONLY | O_CREAT | O_EXCL, def_fmode);
+	if (lock_fd < 0) {
 		sd_eprintf("failed to open lock file %s (%m)", lock_path);
-		ret = -1;
-		goto out;
-	}
+		sd_eprintf("other sheep would be running");
+		sd_eprintf("if no sheep is running, please remove %s manually",
+			lock_path);
 
-	if (lockf(fd, F_TLOCK, 1) < 0) {
-		if (errno == EACCES || errno == EAGAIN) {
-			sd_eprintf("another sheep daemon is using %s", d);
-		} else {
-			sd_eprintf("unable to get base dir lock (%m)");
-		}
 		ret = -1;
-		goto out;
 	}
 
-out:
-	free(lock_path);
 	return ret;
 }
 
+void unlock_base_dir(void)
+{
+	int ret;
+
+	if (!lock_path || lock_fd < 0)
+		return;
+
+	ret = unlink(lock_path);
+	if (ret < 0)
+		sd_eprintf("error at unlinking lock file: %s (%m)", lock_path);
+}
+
 int init_base_path(const char *d)
 {
 	int ret;
-- 
1.7.2.5




More information about the sheepdog mailing list