[sheepdog] [PATCH v2 4/9] sheep: handle node failure during atomic_create_and_write

MORITA Kazutaka morita.kazutaka at gmail.com
Fri Jul 12 22:25:54 CEST 2013


From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

If sheep fails after creating a temporary file and before renaming it
in atomic_create_and_write(), sheep cannot create the file any more
since no one doesn't clean up the temporary file.  This patch adds a
force option to atomic_create_and_write() and cleans up the temporary
file when it is enabled.

It is okay for write_config() and update_epoch_log_() to set the force
option because these function cannot be called from multiple threads
at the same time.  However, md_move_object() can be called from
multiple threads, so this patch keeps the function's behavior as it
was.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 include/util.h |    3 ++-
 lib/util.c     |   22 +++++++++++++++++-----
 sheep/config.c |    2 +-
 sheep/md.c     |    2 +-
 sheep/store.c  |    2 +-
 5 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/include/util.h b/include/util.h
index 6dca655..4611cae 100644
--- a/include/util.h
+++ b/include/util.h
@@ -97,7 +97,8 @@ bool is_xattr_enabled(const char *path);
 void trim_zero_blocks(void *buf, uint64_t *offset, uint32_t *len);
 void untrim_zero_blocks(void *buf, uint64_t offset, uint32_t len,
 			uint32_t requested_len);
-int atomic_create_and_write(const char *path, char *buf, size_t len);
+int atomic_create_and_write(const char *path, char *buf, size_t len,
+			    bool force_create);
 
 /* a type safe version of qsort() */
 #define xqsort(base, nmemb, compar)					\
diff --git a/lib/util.c b/lib/util.c
index 6fd3817..417328b 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -503,18 +503,30 @@ bool is_xattr_enabled(const char *path)
 	return !(ret == -1 && errno == ENOTSUP);
 }
 
-int atomic_create_and_write(const char *path, char *buf, size_t len)
+/*
+ * If force_create is true, this function create the file even when the
+ * temporary file exists.
+ */
+int atomic_create_and_write(const char *path, char *buf, size_t len,
+			    bool force_create)
 {
 	int fd, ret;
 	char tmp_path[PATH_MAX];
 
 	snprintf(tmp_path, PATH_MAX, "%s.tmp", path);
-
+again:
 	fd = open(tmp_path, O_WRONLY | O_CREAT | O_SYNC | O_EXCL, sd_def_fmode);
 	if (fd < 0) {
-		if (errno == EEXIST)
-			sd_dprintf("someone else is dealing with %s", tmp_path);
-		else
+		if (errno == EEXIST) {
+			if (force_create) {
+				sd_dprintf("clean up a temporary file %s",
+					   tmp_path);
+				unlink(tmp_path);
+				goto again;
+			} else
+				sd_dprintf("someone else is dealing with %s",
+					   tmp_path);
+		} else
 			sd_eprintf("failed to open temporal file %s, %m",
 				   tmp_path);
 		ret = -1;
diff --git a/sheep/config.c b/sheep/config.c
index 787dee1..384e711 100644
--- a/sheep/config.c
+++ b/sheep/config.c
@@ -33,7 +33,7 @@ static int write_config(void)
 	int ret;
 
 	ret = atomic_create_and_write(config_path, (char *)&config,
-				sizeof(config));
+				      sizeof(config), true);
 	if (ret < 0) {
 		sd_eprintf("atomic_create_and_write() failed");
 		return SD_RES_EIO;
diff --git a/sheep/md.c b/sheep/md.c
index 5ee3339..e307cf5 100644
--- a/sheep/md.c
+++ b/sheep/md.c
@@ -510,7 +510,7 @@ static int md_move_object(uint64_t oid, char *old, char *new)
 		goto out_close;
 	}
 
-	if (atomic_create_and_write(new, buf.buf, buf.len) < 0) {
+	if (atomic_create_and_write(new, buf.buf, buf.len, false) < 0) {
 		sd_eprintf("failed to create %s", new);
 		ret = -1;
 		goto out_close;
diff --git a/sheep/store.c b/sheep/store.c
index d07a100..177cbcb 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -35,7 +35,7 @@ int update_epoch_log(uint32_t epoch, struct sd_node *nodes, size_t nr_nodes)
 
 	snprintf(path, sizeof(path), "%s%08u", epoch_path, epoch);
 
-	ret = atomic_create_and_write(path, buf, len);
+	ret = atomic_create_and_write(path, buf, len, true);
 
 	free(buf);
 	return ret;
-- 
1.7.9.5




More information about the sheepdog mailing list