[Sheepdog] [PATCH v5 15/17] collie: enable cluster-wide snapshot command

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Wed Jan 4 22:30:23 CET 2012


At Fri, 30 Dec 2011 21:07:10 +0800,
Liu Yuan wrote:
> 
> From: Liu Yuan <tailai.ly at taobao.com>
> 
> Usage:
> $collie cluster snapshot # snapshot the whole cluster
> $collie cluster snapshot -l # list user snapshot info
> $collie cluster snapshot -R epoch # restore to state of targeted snapshot
> 
> For e.g. below script
> #!/bin/bash
> 
> pkill sheep
> rm store/* -rf
> for i in 0 1 2; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p 700$i;sleep 1;done
> collie/collie cluster format -b farm
> qemu-img create -f raw sheepdog:test 1G
> qemu-io -c "write -P 0x1 0 4M" sheepdog:test
> collie/collie cluster snapshot # Index 1
> qemu-io -c "write -P 0x2 4M 4M" sheepdog:test
> collie/collie cluster snapshot # 2
> qemu-io -c "write -P 0x3 8M 4M" sheepdog:test
> collie/collie cluster snapshot # 3
> collie/collie cluster snapshot -l
> collie/collie cluster snapshot -R 2
> ============================================================
> OUTPUT:
> Formatting 'sheepdog:test', fmt=raw size=1073741824
> wrote 4194304/4194304 bytes at offset 0
> 4 MiB, 1 ops; 0.0000 sec (8.142 MiB/sec and 2.0354 ops/sec)
> wrote 4194304/4194304 bytes at offset 4194304
> 4 MiB, 1 ops; 0.0000 sec (7.987 MiB/sec and 1.9968 ops/sec)
> wrote 4194304/4194304 bytes at offset 8388608
> 4 MiB, 1 ops; 0.0000 sec (9.381 MiB/sec and 2.3452 ops/sec)
> Index		Snapshot Time
> 1		Fri Dec 23 22:21:05 2011
> 2		Fri Dec 23 22:21:08 2011
> 3		Fri Dec 23 22:21:11 2011
> Cluster restore to the snapshot 2
> ...
> 
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
>  collie/cluster.c         |  159 ++++++++++++++++++++++++++++++++++++++++++++++
>  collie/collie.c          |    2 +
>  include/sheepdog_proto.h |    1 +
>  sheep/sheep_priv.h       |    2 -
>  4 files changed, 162 insertions(+), 2 deletions(-)

I confirmed some bugs.

 - If I took a cluster-wide snapshot just after cluster format, I
   couldn't restore it.

    $ collie cluster format -b farm
    $ collie cluster snapshot   
    $ qemu-img create sheepdog:test 128G
    Formatting 'sheepdog:test', fmt=raw size=137438953472 
    $ collie cluster snapshot -l
    Index           Snapshot Time
    1               Thu Jan  5 06:34:36 2012
    $ collie cluster snapshot -R 1
    Restore failed: I/O error

 - 'collie vdi list' shows wrong information after restoring a
   snapshot.

    $ collie cluster format -b farm
    $ qemu-img create sheepdog:test 128G
    Formatting 'sheepdog:test', fmt=raw size=137438953472 
    $ collie cluster snapshot     
    $ qemu-img create sheepdog:test2 128G
    Formatting 'sheepdog:test2', fmt=raw size=137438953472 
    $ collie cluster snapshot -R 1
    Cluster restore to the snapshot 1
    $ collie vdi list
      Name        Id    Size    Used  Shared    Creation time   VDI id
      test         1  128 GB  0.0 MB  0.0 MB 2012-01-05 06:37   7c2b25
    Failed to read object 80fd381500000000 No object found
    Failed to read inode header

> 
> diff --git a/collie/cluster.c b/collie/cluster.c
> index 9c18e02..3124a4b 100644
> --- a/collie/cluster.c
> +++ b/collie/cluster.c
> @@ -15,8 +15,11 @@
>  #include <sys/time.h>
>  
>  #include "collie.h"
> +#include "../sheep/farm.h"
>  
>  struct cluster_cmd_data {
> +	int epoch;
> +	int list;
>  	int copies;
>  	int nohalt;
>  	int force;
> @@ -231,6 +234,146 @@ static int cluster_shutdown(int argc, char **argv)
>  	return EXIT_SUCCESS;
>  }
>  
> +static int restore_snap(int epoch)
> +{
> +	int fd, ret;
> +	struct sd_obj_req hdr;
> +	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
> +	unsigned rlen, wlen;
> +
> +	fd = connect_to(sdhost, sdport);
> +	if (fd < 0)
> +		return EXIT_SYSFAIL;
> +
> +	memset(&hdr, 0, sizeof(hdr));
> +
> +	hdr.opcode = SD_OP_RESTORE;
> +	hdr.tgt_epoch = epoch;
> +
> +	rlen = 0;
> +	wlen = 0;
> +	ret = exec_req(fd, (struct sd_req *)&hdr, NULL, &wlen, &rlen);
> +	close(fd);
> +
> +	if (ret) {
> +		fprintf(stderr, "Failed to connect\n");
> +		return EXIT_SYSFAIL;
> +	}
> +
> +	if (rsp->result != SD_RES_SUCCESS) {
> +		fprintf(stderr, "Restore failed: %s\n",
> +				sd_strerror(rsp->result));
> +		return EXIT_FAILURE;
> +	}
> +
> +	printf("Cluster restore to the snapshot %d\n", epoch);
> +	return EXIT_SUCCESS;
> +}
> +
> +static void print_list(void *buf, unsigned len)
> +{
> +	struct snap_log *log_buf = (struct snap_log *)buf;
> +	unsigned nr = len / sizeof (struct snap_log), i;
> +
> +	printf("Index\t\tSnapshot Time\n");
> +	for (i = 0; i < nr; i++, log_buf++) {
> +		time_t *t = (time_t *)&log_buf->time;
> +		printf("%d\t\t", log_buf->epoch);
> +		printf("%s", ctime(t));
> +	}
> +}
> +
> +static int list_snap(void)
> +{
> +	int fd, ret = EXIT_SYSFAIL;
> +	struct sd_req hdr;
> +	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> +	unsigned rlen, wlen;
> +	void *buf;
> +
> +	buf = malloc(SD_DATA_OBJ_SIZE);
> +	if (!buf)
> +		return EXIT_SYSFAIL;
> +
> +	fd = connect_to(sdhost, sdport);
> +	if (fd < 0) {
> +		goto out;
> +	}

Remove redundant braces.

> +
> +	memset(&hdr, 0, sizeof(hdr));
> +
> +	wlen = 0;
> +	rlen = SD_DATA_OBJ_SIZE;
> +	hdr.opcode = SD_OP_GET_SNAP_FILE;
> +	hdr.data_length = rlen;
> +
> +	ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
> +	close(fd);
> +
> +	if (ret) {
> +		fprintf(stderr, "Failed to connect\n");
> +		goto out;
> +	}
> +
> +	if (rsp->result != SD_RES_SUCCESS) {
> +		fprintf(stderr, "Listing snapshots failed: %s\n",
> +				sd_strerror(rsp->result));
> +		ret = EXIT_FAILURE;
> +		goto out;
> +	}
> +
> +	print_list(buf, rlen);
> +out:
> +	free(buf);
> +	return EXIT_SUCCESS;
> +}
> +
> +static int do_snapshot(void)
> +{
> +	int fd, ret;
> +	struct sd_req hdr;
> +	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> +	unsigned rlen, wlen;
> +
> +	fd = connect_to(sdhost, sdport);
> +	if (fd < 0)
> +		return EXIT_SYSFAIL;
> +
> +	memset(&hdr, 0, sizeof(hdr));
> +
> +	hdr.opcode = SD_OP_SNAPSHOT;
> +
> +	rlen = 0;
> +	wlen = 0;
> +	ret = exec_req(fd, &hdr, NULL, &wlen, &rlen);
> +	close(fd);
> +
> +	if (ret) {
> +		fprintf(stderr, "Failed to connect\n");
> +		return EXIT_SYSFAIL;
> +	}
> +
> +	if (rsp->result != SD_RES_SUCCESS) {
> +		fprintf(stderr, "Snapshot failed: %s\n",
> +				sd_strerror(rsp->result));
> +		return EXIT_FAILURE;
> +	}
> +
> +	return EXIT_SUCCESS;
> +}
> +
> +static int cluster_snapshot(int argc, char **argv)
> +{
> +	int ret, epoch = cluster_cmd_data.epoch;
> +	if (epoch)
> +		ret = restore_snap(epoch);
> +	else if(cluster_cmd_data.list)

Needs a space before '('.

Thanks,

Kazutaka



More information about the sheepdog mailing list