[Sheepdog] [PATCH 3/3 UPDATE] sheep: add SD_OP_RECOVER operation

Wed Oct 26 05:32:10 CEST 2011

At Wed, 26 Oct 2011 10:23:53 +0800,
Liu Yuan wrote:
> 
> From: Liu Yuan <tailai.ly at taobao.com>
> 
> With this patch, manual recovery command starts working.
> 
> [Test]
> 
> script1:
> 
> for i in 0 1 2; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
> collie/collie cluster format -H
> collie/collie cluster shutdown;
> sleep 1
> # node 1 2 permanently down
> for i in 0; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
> for i in 0; do ./collie/collie cluster info -p 700$i; done
> collie/collie cluster recover
> for i in 0; do ./collie/collie cluster info -p 700$i; done
> 
> for i in 3 4; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
> for i in 0 3 4; do ./collie/collie cluster info -p 700$i; done
> 
> output:
> 
> root at taobao:/home/dev/sheepdog# ./test2.sh
> Cluster status: Waiting for other nodes joining
> 
> Creation time        Epoch Nodes
> Cluster status: running
> 
> Creation time        Epoch Nodes
> 2011-10-22 02:18:49      2 [192.168.0.1:7000]
> 2011-10-22 02:18:49      1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002]
> Cluster status: running
> 
> Creation time        Epoch Nodes
> 2011-10-22 02:18:49      4 [192.168.0.1:7000, 192.168.0.1:7003, 192.168.0.1:7004]
> 2011-10-22 02:18:49      3 [192.168.0.1:7000, 192.168.0.1:7003]
> 2011-10-22 02:18:49      2 [192.168.0.1:7000]
> 2011-10-22 02:18:49      1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002]
> ....
> 
> script2:
> 
> for i in 0 1 2; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
> collie/collie cluster format
> for i in 0 1 2; do pkill -f "sheep -d /store/$i"; sleep 1; done
> # mater node 2 permanently down
> for i in 0 1; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
> for i in 1; do ./collie/collie cluster info -p 700$i; done
> collie/collie cluster recover -p 7001
> for i in 1; do ./collie/collie cluster info -p 700$i; done
> 
> for i in 0 3; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
> for i in 0 1 3; do ./collie/collie cluster info -p 700$i; done
> 
> output:
> 
> Cluster status: Waiting for other nodes joining
> 
> Creation time        Epoch Nodes
> Cluster status: The sheepdog is stopped doing IO, short of living nodes
> 
> Creation time        Epoch Nodes
> Cluster status: running
> 
> Creation time        Epoch Nodes
> 2011-10-22 01:59:36      5 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7003]
> 2011-10-22 01:59:36      4 [192.168.0.1:7000, 192.168.0.1:7001]
> 2011-10-22 01:59:36      3 [192.168.0.1:7001]
> 2011-10-22 01:59:36      2 [192.168.0.1:7001, 192.168.0.1:7002]
> 2011-10-22 01:59:36      1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002]
> ...
> 
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
> Kazum, this fix an minor uninitialized warning.
> please apply this one.

Applied this patchset, thanks!

Kazutaka

> 
> Yuan
> 
>  include/sheep.h          |    1 +
>  include/sheepdog_proto.h |    1 +
>  sheep/ops.c              |   55 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 57 insertions(+), 0 deletions(-)
> 
> diff --git a/include/sheep.h b/include/sheep.h
> index 46ecf96..072ea7a 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -261,6 +261,7 @@ static inline const char *sd_strerror(int err)
>  		{SD_RES_WAIT_FOR_JOIN, "Waiting for other nodes joining"},
>  		{SD_RES_JOIN_FAILED, "The node had failed to join sheepdog"},
>  		{SD_RES_HALT, "The sheepdog is stopped doing IO, short of living nodes"},
> +		{SD_RES_MANUAL_RECOVER, "We should not maunally recover the running/halted cluster"},
>  
>  		{SD_RES_OLD_NODE_VER, "Remote node has an old epoch"},
>  		{SD_RES_NEW_NODE_VER, "Remote node has a new epoch"},
> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> index 976a5f4..65aeef3 100644
> --- a/include/sheepdog_proto.h
> +++ b/include/sheepdog_proto.h
> @@ -59,6 +59,7 @@
>  #define SD_RES_WAIT_FOR_JOIN    0x17 /* Sheepdog is waiting for other nodes joining */
>  #define SD_RES_JOIN_FAILED   0x18 /* Target node had failed to join sheepdog */
>  #define SD_RES_HALT 0x19 /* Sheepdog is stopped doing IO */
> +#define SD_RES_MANUAL_RECOVER   0x1A /* Users should not manually recover this cluster */
>  
>  /*
>   * Object ID rules
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 8ce9b0e..2d9b241 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -335,6 +335,55 @@ static int local_get_epoch(const struct sd_req *req, struct sd_rsp *rsp,
>  	return ret;
>  }
>  
> +static int cluster_manual_recover(const struct sd_req *req, struct sd_rsp *rsp,
> +				void *data)
> +{
> +	int s, nr_zones = 0, ret = SD_RES_SUCCESS;
> +	uint8_t c;
> +	uint16_t f;
> +
> +	/* We should manually recover the cluster when
> +	 * 1) the master is physically down (different epoch condition).
> +	 * 2) some nodes are physically down (same epoch condition).
> +	 * In both case, the nodes(s) stat is WAIT_FOR_JOIN.
> +	 */
> +	if (!sys_stat_wait_join()) {
> +		ret = SD_RES_MANUAL_RECOVER;
> +		goto out;
> +	}
> +
> +	ret = get_cluster_copies(&c);
> +	if (ret)
> +		goto out;
> +	ret = get_cluster_flags(&f);
> +	if (ret)
> +		goto out;
> +
> +	sys->nr_sobjs = c;
> +	sys->flags = f;
> +
> +	s = SD_STATUS_OK;
> +	if (!sys_flag_nohalt()) {
> +		nr_zones = get_zones_nr_from(sys->nodes, sys->nr_nodes);
> +		if (nr_zones < sys->nr_sobjs)
> +			s = SD_STATUS_HALT;
> +	}
> +
> +	dprintf("flags %d, nr_zones %d, copies %d\n", sys->flags, nr_zones, sys->nr_sobjs);
> +
> +	sys->epoch++; /* some nodes are left, so we get a new epoch */
> +	ret = update_epoch_log(sys->epoch);
> +	if (ret) {
> +		ret = SD_RES_EIO;
> +		sys->epoch--;
> +		goto out;
> +	}
> +	update_epoch_store(sys->epoch);
> +	sys_stat_set(s);
> +out:
> +	return ret;
> +}
> +
>  static struct sd_op_template sd_ops[] = {
>  
>  	/* cluster operations */
> @@ -379,6 +428,12 @@ static struct sd_op_template sd_ops[] = {
>  		.type = SD_OP_TYPE_CLUSTER,
>  	},
>  
> +	[SD_OP_RECOVER] = {
> +		.type = SD_OP_TYPE_CLUSTER,
> +		.force = 1,
> +		.process_main = cluster_manual_recover,
> +	},
> +
>  	/* local operations */
>  	[SD_OP_READ_VDIS] = {
>  		.type = SD_OP_TYPE_LOCAL,
> -- 
> 1.7.6.1
> 
> -- 
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog