[sheepdog] [PATCH] sheep: improve durability when creating object file.

Xu Yifeng skypexu at gmail.com
Wed Mar 4 07:18:02 CET 2015


When creating a new file, make sure all data is written to disk, this is
implemented in two steps:

1. Temporary file is written and committed to disk, this includes file system
   meta data and application data.

2. The temporary file is renamed, the action should also be commited to disk
   by calling fsync() on its directory.

Signed-off-by: Xu Yifeng <skypexu at gmail.com>
---
 sheep/plain_store.c | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 5ea2946..3284e97 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -27,10 +27,11 @@ static inline bool iocb_is_aligned(const struct siocb *iocb)
 
 static int prepare_iocb(uint64_t oid, const struct siocb *iocb, bool create)
 {
-	int flags = O_DSYNC | O_RDWR;
+	int syncflag = create ? O_SYNC : O_DSYNC;
+	int flags = syncflag | O_RDWR;
 
 	if (uatomic_is_true(&sys->use_journal) || sys->nosync == true)
-		flags &= ~O_DSYNC;
+		flags &= ~syncflag;
 
 	if (sys->backend_dio && iocb_is_aligned(iocb)) {
 		if (!is_aligned_to_pagesize(iocb->buf))
@@ -391,7 +392,7 @@ int default_read(uint64_t oid, const struct siocb *iocb)
 
 int default_create_and_write(uint64_t oid, const struct siocb *iocb)
 {
-	char path[PATH_MAX], tmp_path[PATH_MAX];
+	char path[PATH_MAX], tmp_path[PATH_MAX], *dir;
 	int flags = prepare_iocb(oid, iocb, true);
 	int ret, fd;
 	uint32_t len = iocb->length;
@@ -409,7 +410,7 @@ int default_create_and_write(uint64_t oid, const struct siocb *iocb)
 	    != SD_RES_SUCCESS) {
 		sd_err("turn off journaling");
 		uatomic_set_false(&sys->use_journal);
-		flags |= O_DSYNC;
+		flags |= O_SYNC;
 		sync();
 	}
 
@@ -455,18 +456,43 @@ int default_create_and_write(uint64_t oid, const struct siocb *iocb)
 		goto out;
 	}
 
+	close(fd);
+	fd = -1;
 	ret = rename(tmp_path, path);
 	if (ret < 0) {
 		sd_err("failed to rename %s to %s: %m", tmp_path, path);
 		ret = err_to_sderr(path, oid, errno);
-		goto out;
+	}
+
+out:
+	if (ret != SD_RES_SUCCESS) {
+		unlink(tmp_path);
+		if (fd != -1)
+			(void)close(fd);
+		return ret;
+	}
+
+	if (uatomic_is_true(&sys->use_journal))
+		return ret;
+
+	dir = dirname(path);
+	fd = open(dir, O_DIRECTORY|O_RDONLY);
+	if (fd < 0) {
+		sd_err("failed to open directory %s: %m", dir);
+		return err_to_sderr(path, oid, errno);
+	}
+
+	if (fsync(fd) != 0) {
+		sd_err("failed to write directory. %m");
+		ret = err_to_sderr(path, oid, errno);
+		goto out2;
 	}
 
 	ret = SD_RES_SUCCESS;
 	objlist_cache_insert(oid);
-out:
+out2:
 	if (ret != SD_RES_SUCCESS)
-		unlink(tmp_path);
+		unlink(path);
 	close(fd);
 	return ret;
 }
-- 
2.1.0




More information about the sheepdog mailing list