From: Liu Yuan <tailai.ly at taobao.com> Usage: $collie cluster snapshot # snapshot the whole cluster $collie cluster snapshot -l # list user snapshot info $collie cluster snapshot -R epoch # restore to state of targeted snapshot For e.g. below script #!/bin/bash pkill sheep rm store/* -rf for i in 0 1 2; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p 700$i;sleep 1;done collie/collie cluster format -b farm qemu-img create -f raw sheepdog:test 1G qemu-io -c "write -P 0x1 0 4M" sheepdog:test collie/collie cluster snapshot # Index 1 qemu-io -c "write -P 0x2 4M 4M" sheepdog:test collie/collie cluster snapshot # 2 qemu-io -c "write -P 0x3 8M 4M" sheepdog:test collie/collie cluster snapshot # 3 collie/collie cluster snapshot -l collie/collie cluster snapshot -R 2 ============================================================ OUTPUT: Formatting 'sheepdog:test', fmt=raw size=1073741824 wrote 4194304/4194304 bytes at offset 0 4 MiB, 1 ops; 0.0000 sec (8.142 MiB/sec and 2.0354 ops/sec) wrote 4194304/4194304 bytes at offset 4194304 4 MiB, 1 ops; 0.0000 sec (7.987 MiB/sec and 1.9968 ops/sec) wrote 4194304/4194304 bytes at offset 8388608 4 MiB, 1 ops; 0.0000 sec (9.381 MiB/sec and 2.3452 ops/sec) Index Snapshot Time 1 Fri Dec 23 22:21:05 2011 2 Fri Dec 23 22:21:08 2011 3 Fri Dec 23 22:21:11 2011 Cluster restore to the snapshot 2 ... Signed-off-by: Liu Yuan <tailai.ly at taobao.com> --- collie/cluster.c | 157 ++++++++++++++++++++++++++++++++++++++++++++++ collie/collie.c | 2 + include/sheepdog_proto.h | 1 + sheep/sheep_priv.h | 2 - 4 files changed, 160 insertions(+), 2 deletions(-) diff --git a/collie/cluster.c b/collie/cluster.c index 6b9cd4b..7e81bd8 100644 --- a/collie/cluster.c +++ b/collie/cluster.c @@ -17,6 +17,8 @@ #include "collie.h" struct cluster_cmd_data { + int epoch; + int list; int copies; int nohalt; int force; @@ -226,6 +228,145 @@ static int cluster_shutdown(int argc, char **argv) return EXIT_SUCCESS; } +static int restore_snap(int epoch) +{ + int fd, ret; + struct sd_obj_req hdr; + struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; + unsigned rlen, wlen; + + fd = connect_to(sdhost, sdport); + if (fd < 0) + return EXIT_SYSFAIL; + + memset(&hdr, 0, sizeof(hdr)); + + hdr.opcode = SD_OP_RESTORE; + hdr.tgt_epoch = epoch; + + rlen = 0; + wlen = 0; + ret = exec_req(fd, (struct sd_req *)&hdr, NULL, &wlen, &rlen); + close(fd); + + if (ret) { + fprintf(stderr, "Failed to connect\n"); + return EXIT_SYSFAIL; + } + + if (rsp->result != SD_RES_SUCCESS) { + fprintf(stderr, "Restore failed: %s\n", + sd_strerror(rsp->result)); + return EXIT_FAILURE; + } + + printf("Cluster restore to the snapshot %d\n", epoch); + return EXIT_SUCCESS; +} + +static void print_list(void *buf, unsigned len) +{ + struct snap_log *log_buf = (struct snap_log *)buf; + unsigned nr = len / sizeof (struct snap_log), i; + + printf("Index\t\tSnapshot Time\n"); + for (i = 0; i < nr; i++, log_buf++) { + time_t *t = (time_t *)&log_buf->time; + printf("%d\t\t", log_buf->epoch); + printf("%s", ctime(t)); + } +} + +static int list_snap(void) +{ + int fd, ret = EXIT_SYSFAIL; + struct sd_req hdr; + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; + unsigned rlen, wlen; + void *buf; + + buf = malloc(SD_DATA_OBJ_SIZE); + if (!buf) + return EXIT_SYSFAIL; + + fd = connect_to(sdhost, sdport); + if (fd < 0) + goto out; + + memset(&hdr, 0, sizeof(hdr)); + + wlen = 0; + rlen = SD_DATA_OBJ_SIZE; + hdr.opcode = SD_OP_GET_SNAP_FILE; + hdr.data_length = rlen; + + ret = exec_req(fd, &hdr, buf, &wlen, &rlen); + close(fd); + + if (ret) { + fprintf(stderr, "Failed to connect\n"); + goto out; + } + + if (rsp->result != SD_RES_SUCCESS) { + fprintf(stderr, "Listing snapshots failed: %s\n", + sd_strerror(rsp->result)); + ret = EXIT_FAILURE; + goto out; + } + + print_list(buf, rlen); +out: + free(buf); + return EXIT_SUCCESS; +} + +static int do_snapshot(void) +{ + int fd, ret; + struct sd_req hdr; + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; + unsigned rlen, wlen; + + fd = connect_to(sdhost, sdport); + if (fd < 0) + return EXIT_SYSFAIL; + + memset(&hdr, 0, sizeof(hdr)); + + hdr.opcode = SD_OP_SNAPSHOT; + + rlen = 0; + wlen = 0; + ret = exec_req(fd, &hdr, NULL, &wlen, &rlen); + close(fd); + + if (ret) { + fprintf(stderr, "Failed to connect\n"); + return EXIT_SYSFAIL; + } + + if (rsp->result != SD_RES_SUCCESS) { + fprintf(stderr, "Snapshot failed: %s\n", + sd_strerror(rsp->result)); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +static int cluster_snapshot(int argc, char **argv) +{ + int ret, epoch = cluster_cmd_data.epoch; + if (epoch) + ret = restore_snap(epoch); + else if (cluster_cmd_data.list) + ret = list_snap(); + else + ret = do_snapshot(); + return ret; +} + #define RECOVER_PRINT \ "Caution! Please try starting all the cluster nodes normally before\n\ running this command.\n\n\ @@ -292,6 +433,8 @@ static struct subcommand cluster_cmd[] = { SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown}, {"recover", NULL, "afph", "manually recover the cluster", 0, cluster_recover}, + {"snapshot", NULL, "aRlph", "snapshot/restore the cluster", + 0, cluster_snapshot}, {NULL,}, }; @@ -322,6 +465,20 @@ static int cluster_parser(int ch, char *opt) case 'f': cluster_cmd_data.force = 1; break; + case 'R': + cluster_cmd_data.epoch = strtol(opt, &p, 10); + if (opt == p) { + fprintf(stderr, "The epoch must be an integer\n"); + exit(EXIT_FAILURE); + } + if (cluster_cmd_data.epoch < 1) { + fprintf(stderr, "The epoch must be greater than 0\n"); + exit(EXIT_FAILURE); + } + break; + case 'l': + cluster_cmd_data.list = 1; + break; } return 0; diff --git a/collie/collie.c b/collie/collie.c index baf7c67..7989bb2 100644 --- a/collie/collie.c +++ b/collie/collie.c @@ -45,6 +45,8 @@ static const struct sd_option collie_options[] = { {'H', "nohalt", 0, "serve IO requests even if there are too few\n\ nodes for the configured redundancy"}, {'f', "force", 0, "do not prompt for confirmation"}, + {'R', "restore", 1, "restore the cluster"}, + {'l', "list", 0, "list the user epoch information"}, { 0, NULL, 0, NULL }, }; diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index 429f5cd..2d0d5ec 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -22,6 +22,7 @@ #define SD_OP_CREATE_AND_WRITE_OBJ 0x01 #define SD_OP_READ_OBJ 0x02 #define SD_OP_WRITE_OBJ 0x03 +#define SD_OP_REMOVE_OBJ 0x04 #define SD_OP_NEW_VDI 0x11 #define SD_OP_LOCK_VDI 0x12 diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 45f44e0..df641b3 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -21,8 +21,6 @@ #include "sheep.h" #include "cluster.h" -#define SD_OP_REMOVE_OBJ 0x91 - #define SD_OP_GET_OBJ_LIST 0xA1 #define SD_OP_GET_EPOCH 0XA2 -- 1.7.8.2 |