From: Liu Yuan <tailai.ly at taobao.com> Usage: $collie cluster snapshot # snapshot the whole cluster $collie cluster snapshot -l # list user snapshot info $collie cluster snapshot -R epoch # restore to state of targeted snapshot For e.g. below script #!/bin/bash pkill sheep rm store/* -rf for i in 0 1 2; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p 700$i;sleep 1;done collie/collie cluster format qemu-img create -f raw sheepdog:test 1G qemu-io -c "write -P 0x1 0 4M" sheepdog:test collie/collie cluster snapshot # Index 1 qemu-io -c "write -P 0x2 4M 4M" sheepdog:test collie/collie cluster snapshot # 2 qemu-io -c "write -P 0x3 8M 4M" sheepdog:test collie/collie cluster snapshot # 3 collie/collie cluster snapshot -l collie/collie cluster snapshot -R 2 ============================================================ OUTPUT: Formatting 'sheepdog:test', fmt=raw size=1073741824 wrote 4194304/4194304 bytes at offset 0 4 MiB, 1 ops; 0.0000 sec (8.142 MiB/sec and 2.0354 ops/sec) wrote 4194304/4194304 bytes at offset 4194304 4 MiB, 1 ops; 0.0000 sec (7.987 MiB/sec and 1.9968 ops/sec) wrote 4194304/4194304 bytes at offset 8388608 4 MiB, 1 ops; 0.0000 sec (9.381 MiB/sec and 2.3452 ops/sec) Index Snapshot Time 1 Fri Dec 23 22:21:05 2011 2 Fri Dec 23 22:21:08 2011 3 Fri Dec 23 22:21:11 2011 Cluster restore to the snapshot 2 ... Signed-off-by: Liu Yuan <tailai.ly at taobao.com> --- collie/cluster.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++ collie/collie.c | 2 + include/sheepdog_proto.h | 1 + sheep/farm/farm.c | 2 +- sheep/ops.c | 17 +++-- sheep/sheep_priv.h | 2 - 6 files changed, 172 insertions(+), 11 deletions(-) diff --git a/collie/cluster.c b/collie/cluster.c index 6fbda6b..3149a3a 100644 --- a/collie/cluster.c +++ b/collie/cluster.c @@ -15,11 +15,14 @@ #include <sys/time.h> #include "collie.h" +#include "../sheep/farm.h" struct cluster_cmd_data { int copies; int nohalt; int force; + int epoch; + int list; } cluster_cmd_data; static void set_nohalt(uint16_t *p) @@ -177,6 +180,146 @@ static int cluster_shutdown(int argc, char **argv) return EXIT_SUCCESS; } +static int restore_snap(int epoch) +{ + int fd, ret; + struct sd_obj_req hdr; + struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; + unsigned rlen, wlen; + + fd = connect_to(sdhost, sdport); + if (fd < 0) + return EXIT_SYSFAIL; + + memset(&hdr, 0, sizeof(hdr)); + + hdr.opcode = SD_OP_RESTORE; + hdr.tgt_epoch = epoch; + + rlen = 0; + wlen = 0; + ret = exec_req(fd, (struct sd_req *)&hdr, NULL, &wlen, &rlen); + close(fd); + + if (ret) { + fprintf(stderr, "Failed to connect\n"); + return EXIT_SYSFAIL; + } + + if (rsp->result != SD_RES_SUCCESS) { + fprintf(stderr, "Restore failed: %s\n", + sd_strerror(rsp->result)); + return EXIT_FAILURE; + } + + printf("Cluster restore to the snapshot %d\n", epoch); + return EXIT_SUCCESS; +} + +static void print_list(void *buf, unsigned len) +{ + struct snap_log *log_buf = (struct snap_log *)buf; + unsigned nr = len / sizeof (struct snap_log), i; + + printf("Index\t\tSnapshot Time\n"); + for (i = 0; i < nr; i++, log_buf++) { + time_t *t = (time_t *)&log_buf->time; + printf("%d\t\t", log_buf->epoch); + printf("%s", ctime(t)); + } +} + +static int list_snap(void) +{ + int fd, ret = EXIT_SYSFAIL; + struct sd_req hdr; + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; + unsigned rlen, wlen; + void *buf; + + buf = malloc(SD_DATA_OBJ_SIZE); + if (!buf) + return EXIT_SYSFAIL; + + fd = connect_to(sdhost, sdport); + if (fd < 0) { + goto out; + } + + memset(&hdr, 0, sizeof(hdr)); + + wlen = 0; + rlen = SD_DATA_OBJ_SIZE; + hdr.opcode = SD_OP_SNAP_FILE; + hdr.data_length = rlen; + + ret = exec_req(fd, &hdr, buf, &wlen, &rlen); + close(fd); + + if (ret) { + fprintf(stderr, "Failed to connect\n"); + goto out; + } + + if (rsp->result != SD_RES_SUCCESS) { + fprintf(stderr, "Restore failed: %s\n", + sd_strerror(rsp->result)); + ret = EXIT_FAILURE; + goto out; + } + + print_list(buf, rlen); +out: + free(buf); + return EXIT_SUCCESS; +} + +static int do_snapshot(void) +{ + int fd, ret; + struct sd_req hdr; + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; + unsigned rlen, wlen; + + fd = connect_to(sdhost, sdport); + if (fd < 0) + return EXIT_SYSFAIL; + + memset(&hdr, 0, sizeof(hdr)); + + hdr.opcode = SD_OP_SNAPSHOT; + + rlen = 0; + wlen = 0; + ret = exec_req(fd, &hdr, NULL, &wlen, &rlen); + close(fd); + + if (ret) { + fprintf(stderr, "Failed to connect\n"); + return EXIT_SYSFAIL; + } + + if (rsp->result != SD_RES_SUCCESS) { + fprintf(stderr, "Restore failed: %s\n", + sd_strerror(rsp->result)); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +static int cluster_snapshot(int argc, char **argv) +{ + int ret, epoch = cluster_cmd_data.epoch; + if (epoch) + ret = restore_snap(epoch); + else if(cluster_cmd_data.list) + ret = list_snap(); + else + ret = do_snapshot(); + return ret; +} + #define RECOVER_PRINT \ "Caution! Please try starting all the cluster nodes normally before\n\ running this command.\n\n\ @@ -243,6 +386,8 @@ static struct subcommand cluster_cmd[] = { SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown}, {"recover", NULL, "afph", "manually recover the cluster", 0, cluster_recover}, + {"snapshot", NULL, "aRlph", "snapshot/restore the cluster", + 0, cluster_snapshot}, {NULL,}, }; @@ -270,6 +415,20 @@ static int cluster_parser(int ch, char *opt) case 'f': cluster_cmd_data.force = 1; break; + case 'R': + cluster_cmd_data.epoch = strtol(opt, &p, 10); + if (opt == p) { + fprintf(stderr, "The epoch must be an integer\n"); + exit(EXIT_FAILURE); + } + if (cluster_cmd_data.epoch < 1) { + fprintf(stderr, "The epoch must be greater than 0\n"); + exit(EXIT_FAILURE); + } + break; + case 'l': + cluster_cmd_data.list = 1; + break; } return 0; diff --git a/collie/collie.c b/collie/collie.c index 19cc9a9..b3eb0b1 100644 --- a/collie/collie.c +++ b/collie/collie.c @@ -44,6 +44,8 @@ static const struct sd_option collie_options[] = { {'H', "nohalt", 0, "serve IO requests even if there are too few\n\ nodes for the configured redundancy"}, {'f', "force", 0, "do not prompt for confirmation"}, + {'R', "restore", 1, "restore the cluster"}, + {'l', "list", 0, "list the user epoch information"}, { 0, NULL, 0, NULL }, }; diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index b664223..8789095 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -22,6 +22,7 @@ #define SD_OP_CREATE_AND_WRITE_OBJ 0x01 #define SD_OP_READ_OBJ 0x02 #define SD_OP_WRITE_OBJ 0x03 +#define SD_OP_REMOVE_OBJ 0x04 #define SD_OP_NEW_VDI 0x11 #define SD_OP_LOCK_VDI 0x12 diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c index c71eaf2..db7f070 100644 --- a/sheep/farm/farm.c +++ b/sheep/farm/farm.c @@ -528,10 +528,10 @@ static int farm_get_snap_file(struct siocb *iocb) size_t size; int nr; - dprintf("try get snap file\n"); buffer = snap_log_read(&nr, 1); if (!buffer) goto out; + dprintf("get snap file, nr %d\n", nr); size = nr * sizeof(struct snap_log); memcpy(iocb->buf, buffer, size); iocb->length = size; diff --git a/sheep/ops.c b/sheep/ops.c index 5300039..46d2445 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -400,9 +400,9 @@ static int cluster_snapshot(const struct sd_req *req, struct sd_rsp *rsp, static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp, void *data) { - const struct sd_vdi_req *hdr = (const struct sd_vdi_req *)req; + const struct sd_obj_req *hdr = (const struct sd_obj_req *)req; int ret = SD_RES_SUCCESS; - struct siocb iocb = { .epoch = hdr->epoch }; + struct siocb iocb = { .epoch = hdr->tgt_epoch }; if (store.restore) ret = store.restore(&iocb); @@ -410,7 +410,7 @@ static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp, return ret; } -static int cluster_get_snap_file(const struct sd_req *req, struct sd_rsp *rsp, +static int local_get_snap_file(const struct sd_req *req, struct sd_rsp *rsp, void *data) { int ret = SD_RES_SUCCESS; @@ -485,11 +485,6 @@ static struct sd_op_template sd_ops[] = { .force = 1, .process_main = cluster_restore, }, - [SD_OP_SNAP_FILE] = { - .type = SD_OP_TYPE_CLUSTER, - .force = 1, - .process_main = cluster_get_snap_file, - }, /* local operations */ [SD_OP_READ_VDIS] = { @@ -531,6 +526,12 @@ static struct sd_op_template sd_ops[] = { .process_work = local_get_epoch, }, + [SD_OP_SNAP_FILE] = { + .type = SD_OP_TYPE_LOCAL, + .force = 1, + .process_work = local_get_snap_file, + }, + /* I/O operations */ [SD_OP_CREATE_AND_WRITE_OBJ] = { .type = SD_OP_TYPE_IO, diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 4110881..04cd4df 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -21,8 +21,6 @@ #include "sheep.h" #include "cluster.h" -#define SD_OP_REMOVE_OBJ 0x91 - #define SD_OP_GET_OBJ_LIST 0xA1 #define SD_OP_GET_EPOCH 0XA2 -- 1.7.8.rc3 |