[Sheepdog] [PATCH] sheep: abstract out 'all or nothing' write operation

Liu Yuan namei.unix at gmail.com
Wed Dec 21 09:48:26 CET 2011


From: Liu Yuan <tailai.ly at taobao.com>

In recovery, sheep needs to atomically write the object to the store
that is being recovered. This operation assume underlying store layout,
so we need to abstract it out.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/sheep_priv.h   |    1 +
 sheep/simple_store.c |   41 +++++++++++++++++++++++++++++++++++++++++
 sheep/store.c        |   36 ++++++------------------------------
 3 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 29ffd74..8a39d1b 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -175,6 +175,7 @@ struct store_driver {
 	/* Operations in recovery */
 	int (*get_objlist)(struct siocb *);
 	int (*link)(uint64_t oid, struct siocb *, int tgt_epoch);
+	int (*atomic_put)(uint64_t oid, struct siocb *);
 };
 
 extern void register_store_driver(struct store_driver *);
diff --git a/sheep/simple_store.c b/sheep/simple_store.c
index 36c45b3..ec77a8d 100644
--- a/sheep/simple_store.c
+++ b/sheep/simple_store.c
@@ -198,6 +198,46 @@ static int simple_store_link(uint64_t oid, struct siocb *iocb, int tgt_epoch)
        return SD_RES_EIO;
 }
 
+static int simple_store_atomic_put(uint64_t oid, struct siocb *iocb)
+{
+	char path[PATH_MAX], tmp_path[PATH_MAX];
+	int flags = O_DSYNC | O_RDWR | O_CREAT;
+	int ret = SD_RES_EIO, epoch = iocb->epoch, fd;
+	uint32_t len = iocb->length;
+
+	snprintf(path, sizeof(path), "%s%08u/%016" PRIx64, obj_path,
+		 epoch, oid);
+	snprintf(tmp_path, sizeof(tmp_path), "%s%08u/%016" PRIx64 ".tmp",
+		 obj_path, epoch, oid);
+
+	fd = open(tmp_path, flags, def_fmode);
+	if (fd < 0) {
+		eprintf("failed to open %s: %m\n", tmp_path);
+		goto out;
+	}
+
+	ret = write(fd, iocb->buf, len);
+	if (ret != len) {
+		eprintf("failed to write object. %m\n");
+		ret = SD_RES_EIO;
+		goto out_close;
+	}
+
+
+	ret = rename(tmp_path, path);
+	if (ret < 0) {
+		eprintf("failed to rename %s to %s: %m\n", tmp_path, path);
+		ret = SD_RES_EIO;
+		goto out_close;
+	}
+	dprintf("%"PRIx64"\n", oid);
+	ret = SD_RES_SUCCESS;
+out_close:
+	close(fd);
+out:
+	return ret;
+}
+
 struct store_driver store = {
 	.driver_name = "simple",
 	.init = simple_store_init,
@@ -207,6 +247,7 @@ struct store_driver store = {
 	.close = simple_store_close,
 	.get_objlist = simple_store_get_objlist,
 	.link = simple_store_link,
+	.atomic_put = simple_store_atomic_put,
 };
 
 void register_store_driver(struct store_driver *driver)
diff --git a/sheep/store.c b/sheep/store.c
index 4adae51..8c857f9 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1186,6 +1186,7 @@ static int recover_object_from_replica(uint64_t oid,
 	unsigned wlen = 0, rlen;
 	int fd, ret;
 	void *buf;
+	struct siocb iocb = { 0 };
 
 	buf = alloc_buffer_for(oid);
 	if (!buf) {
@@ -1194,8 +1195,6 @@ static int recover_object_from_replica(uint64_t oid,
 	}
 
 	if (is_myself(entry->addr, entry->port)) {
-		struct siocb iocb = { 0 };
-
 		iocb.epoch = epoch;
 		ret = store.link(oid, &iocb, tgt_epoch);
 		if (ret == SD_RES_SUCCESS) {
@@ -1242,34 +1241,11 @@ static int recover_object_from_replica(uint64_t oid,
 	rsp = (struct sd_obj_rsp *)&hdr;
 
 	if (rsp->result == SD_RES_SUCCESS) {
-		char path[PATH_MAX], tmp_path[PATH_MAX];
-		int flags = O_DSYNC | O_RDWR | O_CREAT;
-
-		snprintf(path, sizeof(path), "%s%08u/%016" PRIx64, obj_path,
-				epoch, oid);
-		snprintf(tmp_path, sizeof(tmp_path), "%s%08u/%016" PRIx64 ".tmp",
-				obj_path, epoch, oid);
-
-		fd = open(tmp_path, flags, def_fmode);
-		if (fd < 0) {
-			eprintf("failed to open %s: %m\n", tmp_path);
-			ret = -1;
-			goto out;
-		}
-
-		ret = write(fd, buf, rlen);
-		if (ret != rlen) {
-			eprintf("failed to write object\n");
-			ret = -1;
-			goto out;
-		}
-
-		close(fd);
-
-		dprintf("rename %s to %s\n", tmp_path, path);
-		ret = rename(tmp_path, path);
-		if (ret < 0) {
-			eprintf("failed to rename %s to %s: %m\n", tmp_path, path);
+		iocb.epoch = epoch;
+		iocb.length = rlen;
+		iocb.buf = buf;
+		ret = store.atomic_put(oid, &iocb);
+		if (ret!= SD_RES_SUCCESS) {
 			ret = -1;
 			goto out;
 		}
-- 
1.7.8.rc3




More information about the sheepdog mailing list