[Sheepdog] [PATCH v3 12/12] collie: enable cluster-wide snapshot command

Liu Yuan namei.unix at gmail.com
Fri Dec 23 15:39:30 CET 2011


From: Liu Yuan <tailai.ly at taobao.com>

Usage:
$collie cluster snapshot # snapshot the whole cluster
$collie cluster snapshot -l # list user snapshot info
$collie cluster snapshot -R epoch # restore to state of targeted snapshot

For e.g. below script
#!/bin/bash

pkill sheep
rm store/* -rf
for i in 0 1 2; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p 700$i;sleep 1;done
collie/collie cluster format
qemu-img create -f raw sheepdog:test 1G
qemu-io -c "write -P 0x1 0 4M" sheepdog:test
collie/collie cluster snapshot # Index 1
qemu-io -c "write -P 0x2 4M 4M" sheepdog:test
collie/collie cluster snapshot # 2
qemu-io -c "write -P 0x3 8M 4M" sheepdog:test
collie/collie cluster snapshot # 3
collie/collie cluster snapshot -l
collie/collie cluster snapshot -R 2
============================================================
OUTPUT:
Formatting 'sheepdog:test', fmt=raw size=1073741824
wrote 4194304/4194304 bytes at offset 0
4 MiB, 1 ops; 0.0000 sec (8.142 MiB/sec and 2.0354 ops/sec)
wrote 4194304/4194304 bytes at offset 4194304
4 MiB, 1 ops; 0.0000 sec (7.987 MiB/sec and 1.9968 ops/sec)
wrote 4194304/4194304 bytes at offset 8388608
4 MiB, 1 ops; 0.0000 sec (9.381 MiB/sec and 2.3452 ops/sec)
Index		Snapshot Time
1		Fri Dec 23 22:21:05 2011
2		Fri Dec 23 22:21:08 2011
3		Fri Dec 23 22:21:11 2011
Cluster restore to the snapshot 2
...

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 collie/cluster.c         |  159 ++++++++++++++++++++++++++++++++++++++++++++++
 collie/collie.c          |    2 +
 include/sheepdog_proto.h |    1 +
 sheep/farm/farm.c        |    2 +-
 sheep/ops.c              |   17 +++--
 sheep/sheep_priv.h       |    2 -
 6 files changed, 172 insertions(+), 11 deletions(-)

diff --git a/collie/cluster.c b/collie/cluster.c
index 6fbda6b..3149a3a 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -15,11 +15,14 @@
 #include <sys/time.h>
 
 #include "collie.h"
+#include "../sheep/farm.h"
 
 struct cluster_cmd_data {
 	int copies;
 	int nohalt;
 	int force;
+	int epoch;
+	int list;
 } cluster_cmd_data;
 
 static void set_nohalt(uint16_t *p)
@@ -177,6 +180,146 @@ static int cluster_shutdown(int argc, char **argv)
 	return EXIT_SUCCESS;
 }
 
+static int restore_snap(int epoch)
+{
+	int fd, ret;
+	struct sd_obj_req hdr;
+	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
+	unsigned rlen, wlen;
+
+	fd = connect_to(sdhost, sdport);
+	if (fd < 0)
+		return EXIT_SYSFAIL;
+
+	memset(&hdr, 0, sizeof(hdr));
+
+	hdr.opcode = SD_OP_RESTORE;
+	hdr.tgt_epoch = epoch;
+
+	rlen = 0;
+	wlen = 0;
+	ret = exec_req(fd, (struct sd_req *)&hdr, NULL, &wlen, &rlen);
+	close(fd);
+
+	if (ret) {
+		fprintf(stderr, "Failed to connect\n");
+		return EXIT_SYSFAIL;
+	}
+
+	if (rsp->result != SD_RES_SUCCESS) {
+		fprintf(stderr, "Restore failed: %s\n",
+				sd_strerror(rsp->result));
+		return EXIT_FAILURE;
+	}
+
+	printf("Cluster restore to the snapshot %d\n", epoch);
+	return EXIT_SUCCESS;
+}
+
+static void print_list(void *buf, unsigned len)
+{
+	struct snap_log *log_buf = (struct snap_log *)buf;
+	unsigned nr = len / sizeof (struct snap_log), i;
+
+	printf("Index\t\tSnapshot Time\n");
+	for (i = 0; i < nr; i++, log_buf++) {
+		time_t *t = (time_t *)&log_buf->time;
+		printf("%d\t\t", log_buf->epoch);
+		printf("%s", ctime(t));
+	}
+}
+
+static int list_snap(void)
+{
+	int fd, ret = EXIT_SYSFAIL;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	unsigned rlen, wlen;
+	void *buf;
+
+	buf = malloc(SD_DATA_OBJ_SIZE);
+	if (!buf)
+		return EXIT_SYSFAIL;
+
+	fd = connect_to(sdhost, sdport);
+	if (fd < 0) {
+		goto out;
+	}
+
+	memset(&hdr, 0, sizeof(hdr));
+
+	wlen = 0;
+	rlen = SD_DATA_OBJ_SIZE;
+	hdr.opcode = SD_OP_SNAP_FILE;
+	hdr.data_length = rlen;
+
+	ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
+	close(fd);
+
+	if (ret) {
+		fprintf(stderr, "Failed to connect\n");
+		goto out;
+	}
+
+	if (rsp->result != SD_RES_SUCCESS) {
+		fprintf(stderr, "Restore failed: %s\n",
+				sd_strerror(rsp->result));
+		ret = EXIT_FAILURE;
+		goto out;
+	}
+
+	print_list(buf, rlen);
+out:
+	free(buf);
+	return EXIT_SUCCESS;
+}
+
+static int do_snapshot(void)
+{
+	int fd, ret;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	unsigned rlen, wlen;
+
+	fd = connect_to(sdhost, sdport);
+	if (fd < 0)
+		return EXIT_SYSFAIL;
+
+	memset(&hdr, 0, sizeof(hdr));
+
+	hdr.opcode = SD_OP_SNAPSHOT;
+
+	rlen = 0;
+	wlen = 0;
+	ret = exec_req(fd, &hdr, NULL, &wlen, &rlen);
+	close(fd);
+
+	if (ret) {
+		fprintf(stderr, "Failed to connect\n");
+		return EXIT_SYSFAIL;
+	}
+
+	if (rsp->result != SD_RES_SUCCESS) {
+		fprintf(stderr, "Restore failed: %s\n",
+				sd_strerror(rsp->result));
+		return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
+
+static int cluster_snapshot(int argc, char **argv)
+{
+	int ret, epoch = cluster_cmd_data.epoch;
+	if (epoch)
+		ret = restore_snap(epoch);
+	else if(cluster_cmd_data.list)
+		ret = list_snap();
+	else
+		ret = do_snapshot();
+	return ret;
+}
+
 #define RECOVER_PRINT \
 "Caution! Please try starting all the cluster nodes normally before\n\
 running this command.\n\n\
@@ -243,6 +386,8 @@ static struct subcommand cluster_cmd[] = {
 	 SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown},
 	{"recover", NULL, "afph", "manually recover the cluster",
 	0, cluster_recover},
+	{"snapshot", NULL, "aRlph", "snapshot/restore the cluster",
+	0, cluster_snapshot},
 	{NULL,},
 };
 
@@ -270,6 +415,20 @@ static int cluster_parser(int ch, char *opt)
 	case 'f':
 		cluster_cmd_data.force = 1;
 		break;
+	case 'R':
+		cluster_cmd_data.epoch = strtol(opt, &p, 10);
+		if (opt == p) {
+			fprintf(stderr, "The epoch must be an integer\n");
+			exit(EXIT_FAILURE);
+		}
+		if (cluster_cmd_data.epoch < 1) {
+			fprintf(stderr, "The epoch must be greater than 0\n");
+			exit(EXIT_FAILURE);
+		}
+		break;
+	case 'l':
+		cluster_cmd_data.list = 1;
+		break;
 	}
 
 	return 0;
diff --git a/collie/collie.c b/collie/collie.c
index 19cc9a9..b3eb0b1 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -44,6 +44,8 @@ static const struct sd_option collie_options[] = {
 	{'H', "nohalt", 0, "serve IO requests even if there are too few\n\
                           nodes for the configured redundancy"},
 	{'f', "force", 0, "do not prompt for confirmation"},
+	{'R', "restore", 1, "restore the cluster"},
+	{'l', "list", 0, "list the user epoch information"},
 
 	{ 0, NULL, 0, NULL },
 };
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index b664223..8789095 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -22,6 +22,7 @@
 #define SD_OP_CREATE_AND_WRITE_OBJ  0x01
 #define SD_OP_READ_OBJ       0x02
 #define SD_OP_WRITE_OBJ      0x03
+#define SD_OP_REMOVE_OBJ     0x04
 
 #define SD_OP_NEW_VDI        0x11
 #define SD_OP_LOCK_VDI       0x12
diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index c71eaf2..db7f070 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -528,10 +528,10 @@ static int farm_get_snap_file(struct siocb *iocb)
 	size_t size;
 	int nr;
 
-	dprintf("try get snap file\n");
 	buffer = snap_log_read(&nr, 1);
 	if (!buffer)
 		goto out;
+	dprintf("get snap file, nr %d\n", nr);
 	size = nr * sizeof(struct snap_log);
 	memcpy(iocb->buf, buffer, size);
 	iocb->length = size;
diff --git a/sheep/ops.c b/sheep/ops.c
index 5300039..46d2445 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -400,9 +400,9 @@ static int cluster_snapshot(const struct sd_req *req, struct sd_rsp *rsp,
 static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
 			   void *data)
 {
-	const struct sd_vdi_req *hdr = (const struct sd_vdi_req *)req;
+	const struct sd_obj_req *hdr = (const struct sd_obj_req *)req;
 	int ret = SD_RES_SUCCESS;
-	struct siocb iocb = { .epoch = hdr->epoch };
+	struct siocb iocb = { .epoch = hdr->tgt_epoch };
 
 	if (store.restore)
 		ret = store.restore(&iocb);
@@ -410,7 +410,7 @@ static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
 	return ret;
 }
 
-static int cluster_get_snap_file(const struct sd_req *req, struct sd_rsp *rsp,
+static int local_get_snap_file(const struct sd_req *req, struct sd_rsp *rsp,
 			    void *data)
 {
 	int ret = SD_RES_SUCCESS;
@@ -485,11 +485,6 @@ static struct sd_op_template sd_ops[] = {
 		.force = 1,
 		.process_main = cluster_restore,
 	},
-	[SD_OP_SNAP_FILE] = {
-		.type = SD_OP_TYPE_CLUSTER,
-		.force = 1,
-		.process_main = cluster_get_snap_file,
-	},
 
 	/* local operations */
 	[SD_OP_READ_VDIS] = {
@@ -531,6 +526,12 @@ static struct sd_op_template sd_ops[] = {
 		.process_work = local_get_epoch,
 	},
 
+	[SD_OP_SNAP_FILE] = {
+		.type = SD_OP_TYPE_LOCAL,
+		.force = 1,
+		.process_work = local_get_snap_file,
+	},
+
 	/* I/O operations */
 	[SD_OP_CREATE_AND_WRITE_OBJ] = {
 		.type = SD_OP_TYPE_IO,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 4110881..04cd4df 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -21,8 +21,6 @@
 #include "sheep.h"
 #include "cluster.h"
 
-#define SD_OP_REMOVE_OBJ     0x91
-
 #define SD_OP_GET_OBJ_LIST   0xA1
 #define SD_OP_GET_EPOCH      0XA2
 
-- 
1.7.8.rc3




More information about the sheepdog mailing list