[sheepdog] [PATCH 1/3] collie: add delay_recovery {start|stop} command

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Mon Jul 30 06:43:59 CEST 2012


At Sun, 29 Jul 2012 22:29:20 +0800,
Yunkai Zhang wrote:
> 
> From: Yunkai Zhang <qiushu.zyk at taobao.com>
> 
> = What is delay recovry =
> With delay recovery, we can add(kill) multiple nodes into(from) cluster
> leisurely, only one recovery operation will be triggered at the end of
> delay recovery, so that it can help me to reduce cluster's fluctuation.
> 
> = Usage =
> 1) Start delay recovery transaction:
>  $ collie delay_recovery start
>  Delay recovery start ...
> 
> 2) Add(kill) multiple nodes into(from) cluster
>  ...
> 
> 3) Stop delay recovery transaction:
>  $ collie delay_recovery stop
>  Delay recovery stop.
> 
> This patch only implements the command-line tools to update inner status,
> next patch will do the real work of delay recovry.
> 
> Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com>
> ---
>  collie/Makefile.am       |  3 ++-
>  collie/collie.c          |  1 +
>  collie/collie.h          |  1 +
>  collie/delay_recovery.c  | 65 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/internal_proto.h |  2 ++
>  include/sheepdog_proto.h |  4 +++
>  sheep/ops.c              | 26 +++++++++++++++++++
>  sheep/sheep_priv.h       |  1 +
>  8 files changed, 102 insertions(+), 1 deletion(-)
>  create mode 100644 collie/delay_recovery.c

We use the first argument of the collie command as a target of the
operation.  I prefer 'collie cluster recovery <operation>'.

Thanks,

Kazutaka


> 
> diff --git a/collie/Makefile.am b/collie/Makefile.am
> index d98b709..e7821b1 100644
> --- a/collie/Makefile.am
> +++ b/collie/Makefile.am
> @@ -23,7 +23,8 @@ INCLUDES		= -I$(top_builddir)/include -I$(top_srcdir)/include
>  
>  sbin_PROGRAMS		= collie
>  
> -collie_SOURCES		= collie.c common.c treeview.c vdi.c node.c cluster.c
> +collie_SOURCES		= collie.c common.c treeview.c vdi.c node.c cluster.c \
> +			  delay_recovery.c
>  
>  if BUILD_TRACE
>  collie_SOURCES          += debug.c
> diff --git a/collie/collie.c b/collie/collie.c
> index fda9a59..7d30537 100644
> --- a/collie/collie.c
> +++ b/collie/collie.c
> @@ -285,6 +285,7 @@ int main(int argc, char **argv)
>  		vdi_command,
>  		node_command,
>  		cluster_command,
> +		delay_recovery_command,
>  		debug_command,
>  		{NULL,}
>  	};
> diff --git a/collie/collie.h b/collie/collie.h
> index 75a675c..32a0368 100644
> --- a/collie/collie.h
> +++ b/collie/collie.h
> @@ -76,6 +76,7 @@ int send_light_req(struct sd_req *hdr, const char *host, int port);
>  extern struct command vdi_command;
>  extern struct command node_command;
>  extern struct command cluster_command;
> +extern struct command delay_recovery_command;
>  
>  #ifdef ENABLE_TRACE
>    extern struct command debug_command;
> diff --git a/collie/delay_recovery.c b/collie/delay_recovery.c
> new file mode 100644
> index 0000000..2cd4841
> --- /dev/null
> +++ b/collie/delay_recovery.c
> @@ -0,0 +1,65 @@
> +/*
> + * Copyright (C) 2011 Taobao Inc.
> + *
> + * Yunkai Zhang <yunkai.me at gmail.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <time.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <sys/time.h>
> +
> +#include "collie.h"
> +
> +static int delay_recovery_start(int argc, char **argv)
> +{
> +	int ret;
> +	struct sd_req hdr;
> +
> +	sd_init_req(&hdr, SD_OP_START_DELAY_RECOVERY);
> +	hdr.epoch = sd_epoch;
> +
> +	ret = send_light_req(&hdr, sdhost, sdport);
> +	if (ret)
> +		return EXIT_FAILURE;
> +
> +	printf("Delay recovery start ...\n");
> +	return EXIT_SUCCESS;
> +}
> +
> +static int delay_recovery_stop(int argc, char **argv)
> +{
> +	int ret;
> +	struct sd_req hdr;
> +
> +	sd_init_req(&hdr, SD_OP_STOP_DELAY_RECOVERY);
> +	hdr.epoch = sd_epoch;
> +
> +	ret = send_light_req(&hdr, sdhost, sdport);
> +	if (ret)
> +		return EXIT_FAILURE;
> +
> +	printf("Delay recovery stop.\n");
> +	return EXIT_SUCCESS;
> +}
> +
> +static struct subcommand delay_recovery_cmd[] = {
> +	{"start", NULL, "aph", "start delay recovery",
> +	 0, delay_recovery_start},
> +	{"stop", NULL, "aph", "stop delay recovery",
> +	 0, delay_recovery_stop},
> +	{NULL,},
> +};
> +
> +struct command delay_recovery_command = {
> +	"delay_recovery",
> +	delay_recovery_cmd,
> +	NULL
> +};
> diff --git a/include/internal_proto.h b/include/internal_proto.h
> index abc14dc..d6a7989 100644
> --- a/include/internal_proto.h
> +++ b/include/internal_proto.h
> @@ -60,6 +60,8 @@
>  #define SD_OP_READ_PEER      0xA4
>  #define SD_OP_WRITE_PEER     0xA5
>  #define SD_OP_REMOVE_PEER    0xA6
> +#define SD_OP_STOP_DELAY_RECOVERY 0xA7
> +#define SD_OP_START_DELAY_RECOVERY 0xA8
>  
>  /* internal flags for hdr.flags, must be above 0x80 */
>  #define SD_FLAG_CMD_RECOVERY 0x0080
> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> index 45a4b81..5a306a6 100644
> --- a/include/sheepdog_proto.h
> +++ b/include/sheepdog_proto.h
> @@ -104,6 +104,10 @@
>  
>  #define STORE_LEN 16
>  
> +/* status for delay recovery */
> +#define SD_DELAY_RECOVERY_STOP 0
> +#define SD_DELAY_RECOVERY_START 1
> +
>  struct sd_req {
>  	uint8_t		proto_ver;
>  	uint8_t		opcode;
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 8aa6b34..c0fa98e 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -260,6 +260,20 @@ static int cluster_shutdown(const struct sd_req *req, struct sd_rsp *rsp,
>  	return SD_RES_SUCCESS;
>  }
>  
> +static int cluster_start_delay_recovery(const struct sd_req *req,
> +				    struct sd_rsp *rsp, void *data)
> +{
> +	sys->delay_recovery = SD_DELAY_RECOVERY_START;
> +	return SD_RES_SUCCESS;
> +}
> +
> +static int cluster_stop_delay_recovery(const struct sd_req *req,
> +				   struct sd_rsp *rsp, void *data)
> +{
> +	sys->delay_recovery = SD_DELAY_RECOVERY_STOP;
> +	return SD_RES_SUCCESS;
> +}
> +
>  static int cluster_get_vdi_attr(struct request *req)
>  {
>  	const struct sd_req *hdr = &req->rq;
> @@ -1000,6 +1014,18 @@ static struct sd_op_template sd_ops[] = {
>  		.type = SD_OP_TYPE_PEER,
>  		.process_work = peer_remove_obj,
>  	},
> +
> +	[SD_OP_START_DELAY_RECOVERY] = {
> +		.name = "START_DELAY_RECOVERY",
> +		.type = SD_OP_TYPE_CLUSTER,
> +		.process_main = cluster_start_delay_recovery,
> +	},
> +
> +	[SD_OP_STOP_DELAY_RECOVERY] = {
> +		.name = "STOP_DELAY_RECOVERY",
> +		.type = SD_OP_TYPE_CLUSTER,
> +		.process_main = cluster_stop_delay_recovery,
> +	},
>  };
>  
>  struct sd_op_template *get_sd_op(uint8_t opcode)
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index 530fe14..22d9d6c 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -116,6 +116,7 @@ struct cluster_info {
>  
>  	int use_directio;
>  	uint8_t gateway_only;
> +	uint8_t delay_recovery;
>  
>  	struct work_queue *gateway_wqueue;
>  	struct work_queue *io_wqueue;
> -- 
> 1.7.11.2
> 
> -- 
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog



More information about the sheepdog mailing list