[Sheepdog] [PATCH v5 14/17] sheep: add cluster snapshot/restore support

Fri Dec 30 14:07:09 CET 2011

From: Liu Yuan <tailai.ly at taobao.com>

This kind of snapshot is supposed be triggered by user, _not_ by recovery
code. I don't think we need to restore to the state at the beginning of
the recovery. So this work only permits us to restore cluster to the
snapshot initiated by end users, thought it is quite easy to implement
to restore to the snapshots forcibly taken by recovery path.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 include/sheep.h          |    4 ++
 include/sheepdog_proto.h |    1 +
 sheep/farm/farm.c        |  134 ++++++++++++++++++++++++++++++++++++++++++++++
 sheep/ops.c              |   61 +++++++++++++++++++++
 sheep/sheep_priv.h       |    4 ++
 5 files changed, 204 insertions(+), 0 deletions(-)

diff --git a/include/sheep.h b/include/sheep.h
index 6cd63e7..f634981 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -39,6 +39,9 @@
 #define SD_OP_GET_VDI_ATTR   0x89
 #define SD_OP_RECOVER        0x8a
 #define SD_OP_GET_STORE_LIST 0x90
+#define SD_OP_SNAPSHOT       0x91
+#define SD_OP_RESTORE        0x92
+#define SD_OP_GET_SNAP_FILE  0x93
 
 #define SD_FLAG_CMD_IO_LOCAL   0x0010
 #define SD_FLAG_CMD_RECOVERY 0x0020
@@ -266,6 +269,7 @@ static inline const char *sd_strerror(int err)
 		{SD_RES_HALT, "IO has halted as there are too few living nodes"},
 		{SD_RES_MANUAL_RECOVER, "Cluster is running/halted and cannot be manually recovered"},
 		{SD_RES_NO_STORE, "Targeted backend store is not found"},
+		{SD_RES_NO_SUPPORT, "Operation is not supported"},
 
 		{SD_RES_OLD_NODE_VER, "Remote node has an old epoch"},
 		{SD_RES_NEW_NODE_VER, "Remote node has a new epoch"},
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 289502f..d3defeb 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -60,6 +60,7 @@
 #define SD_RES_HALT 0x19 /* Sheepdog is stopped doing IO */
 #define SD_RES_MANUAL_RECOVER   0x1A /* Users should not manually recover this cluster */
 #define SD_RES_NO_STORE         0x20 /* No targeted backend store */
+#define SD_RES_NO_SUPPORT       0x21 /* Operation is not supported by backend store */
 
 /*
  * Object ID rules
diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index d7a0893..c0fd65f 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -427,6 +427,137 @@ out:
 	return ret;
 }
 
+static int farm_snapshot(struct siocb *iocb)
+{
+	unsigned char snap_sha1[SHA1_LEN];
+	void *buffer;
+	int log_nr, ret = SD_RES_EIO, epoch;
+
+	buffer = snap_log_read(&log_nr, 1);
+	if (!buffer)
+		goto out;
+
+	epoch = log_nr + 1;
+	dprintf("user epoch %d\n", epoch);
+	if (snap_file_write(epoch, snap_sha1, 1) < 0)
+		goto out;
+
+	if (snap_log_write(epoch, snap_sha1, 1) < 0)
+		goto out;
+
+	ret = SD_RES_SUCCESS;
+out:
+	free(buffer);
+	return ret;
+}
+
+static int cleanup_working_dir(void)
+{
+	DIR *dir;
+	struct dirent *d;
+
+	dprintf("try clean up working dir\n");
+	dir = opendir(obj_path);
+	if (!dir)
+		return -1;
+
+	while ((d = readdir(dir))) {
+		char p[PATH_MAX];
+		if (!strncmp(d->d_name, ".", 1))
+			continue;
+		snprintf(p, sizeof(p), "%s%s", obj_path, d->d_name);
+		if (unlink(p) < 0) {
+			eprintf("%s:%m\n", p);
+			continue;
+		}
+		dprintf("remove file %s\n", d->d_name);
+	}
+	closedir(dir);
+	return 0;
+}
+
+static int restore_objects_from_snap(int epoch)
+{
+	struct sha1_file_hdr hdr;
+	struct trunk_entry *trunk_buf, *trunk_free = NULL;
+	unsigned char trunk_sha1[SHA1_LEN];
+	uint64_t nr_trunks, i;
+	int ret = SD_RES_EIO;
+
+	if (get_trunk_sha1(epoch, trunk_sha1, 1) < 0)
+		goto out;
+
+	trunk_free = trunk_buf = trunk_file_read(trunk_sha1, &hdr);
+	if (!trunk_buf)
+		goto out;
+
+	nr_trunks = hdr.priv;
+	for (i = 0; i < nr_trunks; i++, trunk_buf++) {
+		struct sha1_file_hdr h;
+		struct siocb io = { 0 };
+		uint64_t oid;
+		void *buffer = NULL;
+
+		oid = trunk_buf->oid;
+		buffer = sha1_file_read(trunk_buf->sha1, &h);
+		if (!buffer) {
+			eprintf("oid %"PRIx64" not restored\n", oid);
+			goto out;
+		}
+		io.length = h.size;
+		io.buf = buffer;
+		ret = farm_atomic_put(oid, &io);
+		if (ret != SD_RES_SUCCESS) {
+			eprintf("oid %"PRIx64" not restored\n", oid);
+			goto out;
+		} else
+			dprintf("oid %"PRIx64" restored\n", oid);
+
+		free(buffer);
+	}
+out:
+	free(trunk_free);
+	return ret;
+}
+
+static int farm_restore(struct siocb *iocb)
+{
+	int ret = SD_RES_EIO, epoch = iocb->epoch;
+
+	dprintf("try recover user epoch %d\n", epoch);
+
+	if (cleanup_working_dir() < 0) {
+		eprintf("failed to clean up the working dir %m\n");
+		goto out;
+	}
+
+	ret = restore_objects_from_snap(epoch);
+	if (ret != SD_RES_SUCCESS)
+		goto out;
+out:
+	return ret;
+}
+
+static int farm_get_snap_file(struct siocb *iocb)
+{
+	int ret = SD_RES_EIO;
+	void *buffer = NULL;
+	size_t size;
+	int nr;
+
+	dprintf("try get snap file\n");
+	buffer = snap_log_read(&nr, 1);
+	if (!buffer)
+		goto out;
+	size = nr * sizeof(struct snap_log);
+	memcpy(iocb->buf, buffer, size);
+	iocb->length = size;
+	ret = SD_RES_SUCCESS;
+out:
+	free(buffer);
+	return ret;
+}
+
 struct store_driver farm = {
 	.name = "farm",
 	.init = farm_init,
@@ -439,6 +570,9 @@ struct store_driver farm = {
 	.atomic_put = farm_atomic_put,
 	.begin_recover = farm_begin_recover,
 	.end_recover = farm_end_recover,
+	.snapshot = farm_snapshot,
+	.restore = farm_restore,
+	.get_snap_file = farm_get_snap_file,
 };
 
 add_store_driver(farm);
diff --git a/sheep/ops.c b/sheep/ops.c
index 478d41a..5270ee4 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -407,6 +407,49 @@ out:
 	return ret;
 }
 
+static int cluster_snapshot(const struct sd_req *req, struct sd_rsp *rsp,
+			    void *data)
+{
+	int ret;
+	struct siocb iocb = { 0 };
+
+	if (sd_store->snapshot)
+		ret = sd_store->snapshot(&iocb);
+	else
+		ret = SD_RES_NO_SUPPORT;
+
+	return ret;
+}
+
+static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
+			   void *data)
+{
+	const struct sd_obj_req *hdr = (const struct sd_obj_req *)req;
+	int ret;
+	struct siocb iocb = { .epoch = hdr->tgt_epoch };
+
+	if (sd_store->restore)
+		ret = sd_store->restore(&iocb);
+	else
+		ret = SD_RES_NO_SUPPORT;
+	return ret;
+}
+
+static int local_get_snap_file(const struct sd_req *req, struct sd_rsp *rsp,
+			    void *data)
+{
+	int ret;
+	struct siocb iocb = { .buf = data };
+
+	if (sd_store->get_snap_file) {
+		ret = sd_store->get_snap_file(&iocb);
+		rsp->data_length = iocb.length;
+	} else
+		ret = SD_RES_NO_SUPPORT;
+
+	return ret;
+}
+
 static struct sd_op_template sd_ops[] = {
 
 	/* cluster operations */
@@ -457,6 +500,18 @@ static struct sd_op_template sd_ops[] = {
 		.process_main = cluster_manual_recover,
 	},
 
+	[SD_OP_SNAPSHOT] = {
+		.type = SD_OP_TYPE_CLUSTER,
+		.force = 1,
+		.process_main = cluster_snapshot,
+	},
+
+	[SD_OP_RESTORE] = {
+		.type = SD_OP_TYPE_CLUSTER,
+		.force = 1,
+		.process_main = cluster_restore,
+	},
+
 	/* local operations */
 	[SD_OP_GET_STORE_LIST] = {
 		.type = SD_OP_TYPE_LOCAL,
@@ -503,6 +558,12 @@ static struct sd_op_template sd_ops[] = {
 		.process_work = local_get_epoch,
 	},
 
+	[SD_OP_GET_SNAP_FILE] = {
+		.type = SD_OP_TYPE_LOCAL,
+		.force = 1,
+		.process_work = local_get_snap_file,
+	},
+
 	/* I/O operations */
 	[SD_OP_CREATE_AND_WRITE_OBJ] = {
 		.type = SD_OP_TYPE_IO,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 825e2f9..6711555 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -179,6 +179,10 @@ struct store_driver {
 	int (*atomic_put)(uint64_t oid, struct siocb *);
 	int (*begin_recover)(struct siocb *);
 	int (*end_recover)(struct siocb *);
+	/* Operations for snapshot */
+	int (*snapshot)(struct siocb *);
+	int (*restore)(struct siocb *);
+	int (*get_snap_file)(struct siocb *);
 };
 
 extern struct list_head store_drivers;
-- 
1.7.8.rc3