[Sheepdog] [PATCH v5 15/17] collie: enable cluster-wide snapshot command
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Wed Jan 4 22:30:23 CET 2012
At Fri, 30 Dec 2011 21:07:10 +0800,
Liu Yuan wrote:
>
> From: Liu Yuan <tailai.ly at taobao.com>
>
> Usage:
> $collie cluster snapshot # snapshot the whole cluster
> $collie cluster snapshot -l # list user snapshot info
> $collie cluster snapshot -R epoch # restore to state of targeted snapshot
>
> For e.g. below script
> #!/bin/bash
>
> pkill sheep
> rm store/* -rf
> for i in 0 1 2; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p 700$i;sleep 1;done
> collie/collie cluster format -b farm
> qemu-img create -f raw sheepdog:test 1G
> qemu-io -c "write -P 0x1 0 4M" sheepdog:test
> collie/collie cluster snapshot # Index 1
> qemu-io -c "write -P 0x2 4M 4M" sheepdog:test
> collie/collie cluster snapshot # 2
> qemu-io -c "write -P 0x3 8M 4M" sheepdog:test
> collie/collie cluster snapshot # 3
> collie/collie cluster snapshot -l
> collie/collie cluster snapshot -R 2
> ============================================================
> OUTPUT:
> Formatting 'sheepdog:test', fmt=raw size=1073741824
> wrote 4194304/4194304 bytes at offset 0
> 4 MiB, 1 ops; 0.0000 sec (8.142 MiB/sec and 2.0354 ops/sec)
> wrote 4194304/4194304 bytes at offset 4194304
> 4 MiB, 1 ops; 0.0000 sec (7.987 MiB/sec and 1.9968 ops/sec)
> wrote 4194304/4194304 bytes at offset 8388608
> 4 MiB, 1 ops; 0.0000 sec (9.381 MiB/sec and 2.3452 ops/sec)
> Index Snapshot Time
> 1 Fri Dec 23 22:21:05 2011
> 2 Fri Dec 23 22:21:08 2011
> 3 Fri Dec 23 22:21:11 2011
> Cluster restore to the snapshot 2
> ...
>
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
> collie/cluster.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++
> collie/collie.c | 2 +
> include/sheepdog_proto.h | 1 +
> sheep/sheep_priv.h | 2 -
> 4 files changed, 162 insertions(+), 2 deletions(-)
I confirmed some bugs.
- If I took a cluster-wide snapshot just after cluster format, I
couldn't restore it.
$ collie cluster format -b farm
$ collie cluster snapshot
$ qemu-img create sheepdog:test 128G
Formatting 'sheepdog:test', fmt=raw size=137438953472
$ collie cluster snapshot -l
Index Snapshot Time
1 Thu Jan 5 06:34:36 2012
$ collie cluster snapshot -R 1
Restore failed: I/O error
- 'collie vdi list' shows wrong information after restoring a
snapshot.
$ collie cluster format -b farm
$ qemu-img create sheepdog:test 128G
Formatting 'sheepdog:test', fmt=raw size=137438953472
$ collie cluster snapshot
$ qemu-img create sheepdog:test2 128G
Formatting 'sheepdog:test2', fmt=raw size=137438953472
$ collie cluster snapshot -R 1
Cluster restore to the snapshot 1
$ collie vdi list
Name Id Size Used Shared Creation time VDI id
test 1 128 GB 0.0 MB 0.0 MB 2012-01-05 06:37 7c2b25
Failed to read object 80fd381500000000 No object found
Failed to read inode header
>
> diff --git a/collie/cluster.c b/collie/cluster.c
> index 9c18e02..3124a4b 100644
> --- a/collie/cluster.c
> +++ b/collie/cluster.c
> @@ -15,8 +15,11 @@
> #include <sys/time.h>
>
> #include "collie.h"
> +#include "../sheep/farm.h"
>
> struct cluster_cmd_data {
> + int epoch;
> + int list;
> int copies;
> int nohalt;
> int force;
> @@ -231,6 +234,146 @@ static int cluster_shutdown(int argc, char **argv)
> return EXIT_SUCCESS;
> }
>
> +static int restore_snap(int epoch)
> +{
> + int fd, ret;
> + struct sd_obj_req hdr;
> + struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
> + unsigned rlen, wlen;
> +
> + fd = connect_to(sdhost, sdport);
> + if (fd < 0)
> + return EXIT_SYSFAIL;
> +
> + memset(&hdr, 0, sizeof(hdr));
> +
> + hdr.opcode = SD_OP_RESTORE;
> + hdr.tgt_epoch = epoch;
> +
> + rlen = 0;
> + wlen = 0;
> + ret = exec_req(fd, (struct sd_req *)&hdr, NULL, &wlen, &rlen);
> + close(fd);
> +
> + if (ret) {
> + fprintf(stderr, "Failed to connect\n");
> + return EXIT_SYSFAIL;
> + }
> +
> + if (rsp->result != SD_RES_SUCCESS) {
> + fprintf(stderr, "Restore failed: %s\n",
> + sd_strerror(rsp->result));
> + return EXIT_FAILURE;
> + }
> +
> + printf("Cluster restore to the snapshot %d\n", epoch);
> + return EXIT_SUCCESS;
> +}
> +
> +static void print_list(void *buf, unsigned len)
> +{
> + struct snap_log *log_buf = (struct snap_log *)buf;
> + unsigned nr = len / sizeof (struct snap_log), i;
> +
> + printf("Index\t\tSnapshot Time\n");
> + for (i = 0; i < nr; i++, log_buf++) {
> + time_t *t = (time_t *)&log_buf->time;
> + printf("%d\t\t", log_buf->epoch);
> + printf("%s", ctime(t));
> + }
> +}
> +
> +static int list_snap(void)
> +{
> + int fd, ret = EXIT_SYSFAIL;
> + struct sd_req hdr;
> + struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> + unsigned rlen, wlen;
> + void *buf;
> +
> + buf = malloc(SD_DATA_OBJ_SIZE);
> + if (!buf)
> + return EXIT_SYSFAIL;
> +
> + fd = connect_to(sdhost, sdport);
> + if (fd < 0) {
> + goto out;
> + }
Remove redundant braces.
> +
> + memset(&hdr, 0, sizeof(hdr));
> +
> + wlen = 0;
> + rlen = SD_DATA_OBJ_SIZE;
> + hdr.opcode = SD_OP_GET_SNAP_FILE;
> + hdr.data_length = rlen;
> +
> + ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
> + close(fd);
> +
> + if (ret) {
> + fprintf(stderr, "Failed to connect\n");
> + goto out;
> + }
> +
> + if (rsp->result != SD_RES_SUCCESS) {
> + fprintf(stderr, "Listing snapshots failed: %s\n",
> + sd_strerror(rsp->result));
> + ret = EXIT_FAILURE;
> + goto out;
> + }
> +
> + print_list(buf, rlen);
> +out:
> + free(buf);
> + return EXIT_SUCCESS;
> +}
> +
> +static int do_snapshot(void)
> +{
> + int fd, ret;
> + struct sd_req hdr;
> + struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> + unsigned rlen, wlen;
> +
> + fd = connect_to(sdhost, sdport);
> + if (fd < 0)
> + return EXIT_SYSFAIL;
> +
> + memset(&hdr, 0, sizeof(hdr));
> +
> + hdr.opcode = SD_OP_SNAPSHOT;
> +
> + rlen = 0;
> + wlen = 0;
> + ret = exec_req(fd, &hdr, NULL, &wlen, &rlen);
> + close(fd);
> +
> + if (ret) {
> + fprintf(stderr, "Failed to connect\n");
> + return EXIT_SYSFAIL;
> + }
> +
> + if (rsp->result != SD_RES_SUCCESS) {
> + fprintf(stderr, "Snapshot failed: %s\n",
> + sd_strerror(rsp->result));
> + return EXIT_FAILURE;
> + }
> +
> + return EXIT_SUCCESS;
> +}
> +
> +static int cluster_snapshot(int argc, char **argv)
> +{
> + int ret, epoch = cluster_cmd_data.epoch;
> + if (epoch)
> + ret = restore_snap(epoch);
> + else if(cluster_cmd_data.list)
Needs a space before '('.
Thanks,
Kazutaka
More information about the sheepdog
mailing list