[Sheepdog] [PATCH] collie: add an operation to show nodes in recovery

Liu Yuan namei.unix at gmail.com
Wed Apr 25 04:10:06 CEST 2012


On 04/23/2012 11:32 AM, Li Wenpeng wrote:

> From: levin li <xingke.lwp at taobao.com>
> 
> It's useful to show nodes in recovery when we debuging
> or maintaining the cluster, many times we want to know
> which nodes are in recovery, before this, we check the
> logs to find out whether the cluster has ended recovery,
> but with this patch, it's easier.
> 
> usage:
> 
> #collie node recovery
> 
> output:
> 
> Nodes In Recovery:
> M   Id   Host:Port         V-Nodes       Zone
> -    4   127.0.0.1:7006      	64          6
> -    5   127.0.0.1:7007      	64          7
> -    8   127.0.0.1:7010      	64         10
> -    9   127.0.0.1:7011      	64         11
> -   10   127.0.0.1:7012      	64         12
> 


Why we need show M (I guess it denotes mastership)? It seems that you
copy this scheme from 'node list'...but actually, M column need to be
removed.

Thanks,
Yuan

> Signed-off-by: levin li <xingke.lwp at taobao.com>
> ---
>  collie/node.c   |   54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/sheep.h |    1 +
>  sheep/ops.c     |   16 ++++++++++++++++
>  3 files changed, 71 insertions(+), 0 deletions(-)
> 
> diff --git a/collie/node.c b/collie/node.c
> index a8804f8..e669197 100644
> --- a/collie/node.c
> +++ b/collie/node.c
> @@ -116,11 +116,65 @@ static int node_info(int argc, char **argv)
>  	return EXIT_SUCCESS;
>  }
>  
> +static int node_recovery(int argc, char **argv)
> +{
> +	int i, ret;
> +
> +	if (!raw_output) {
> +		printf("Nodes In Recovery:\n");
> +		printf("M   Id   Host:Port         V-Nodes       Zone\n");
> +	}
> +
> +	for (i = 0; i < nr_nodes; i++) {
> +		char host[128];
> +		int fd;
> +		unsigned wlen, rlen;
> +		struct sd_node_req req;
> +		struct sd_node_rsp *rsp = (struct sd_node_rsp *)&req;
> +
> +		addr_to_str(host, sizeof(host), node_list_entries[i].addr, 0);
> +
> +		fd = connect_to(host, node_list_entries[i].port);
> +		if (fd < 0)
> +			return EXIT_FAILURE;
> +
> +		memset(&req, 0, sizeof(req));
> +
> +		req.opcode = SD_OP_STAT_RECOVERY;
> +
> +		wlen = 0;
> +		rlen = 0;
> +		ret = exec_req(fd, (struct sd_req *)&req, NULL, &wlen, &rlen);
> +		close(fd);
> +
> +		if (!ret && rsp->result == SD_RES_SUCCESS) {
> +			addr_to_str(host, sizeof(host),
> +					node_list_entries[i].addr, node_list_entries[i].port);
> +			if (i == master_idx) {
> +				if (highlight)
> +					printf(TEXT_BOLD);
> +				printf(raw_output ? "* %d %s %d %d\n" : "* %4d   %-20s\t%2d%11d\n",
> +					   i, host, node_list_entries[i].nr_vnodes,
> +					   node_list_entries[i].zone);
> +				if (highlight)
> +					printf(TEXT_NORMAL);
> +			} else
> +				printf(raw_output ? "- %d %s %d %d\n" : "- %4d   %-20s\t%2d%11d\n",
> +					   i, host, node_list_entries[i].nr_vnodes,
> +					   node_list_entries[i].zone);
> +		}
> +	}
> +
> +	return EXIT_SUCCESS;
> +}
> +
>  static struct subcommand node_cmd[] = {
>  	{"list", NULL, "aprh", "list nodes",
>  	 SUBCMD_FLAG_NEED_NODELIST, node_list},
>  	{"info", NULL, "aprh", "show information about each node",
>  	 SUBCMD_FLAG_NEED_NODELIST, node_info},
> +	{"recovery", NULL, "aprh", "show nodes in recovery",
> +	 SUBCMD_FLAG_NEED_NODELIST, node_recovery},
>  	{NULL,},
>  };
>  
> diff --git a/include/sheep.h b/include/sheep.h
> index fc2ac58..7e287c4 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -45,6 +45,7 @@
>  #define SD_OP_CLEANUP        0x94
>  #define SD_OP_TRACE          0x95
>  #define SD_OP_TRACE_CAT      0x96
> +#define SD_OP_STAT_RECOVERY  0x97
>  
>  #define SD_FLAG_CMD_IO_LOCAL   0x0010
>  #define SD_FLAG_CMD_RECOVERY 0x0020
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 99bde61..815c268 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -289,6 +289,17 @@ static int local_stat_sheep(const struct sd_req *req, struct sd_rsp *rsp,
>  	return stat_sheep(&node_rsp->store_size, &node_rsp->store_free, epoch);
>  }
>  
> +static int local_stat_recovery(const struct sd_req *req, struct sd_rsp *rsp,
> +					void *data)
> +{
> +	if (node_in_recovery())
> +		return SD_RES_SUCCESS;
> +	else
> +		return SD_RES_UNKNOWN;
> +
> +	return SD_RES_UNKNOWN;
> +}
> +
>  static int local_stat_cluster(const struct sd_req *req, struct sd_rsp *rsp,
>  			      void *data)
>  {
> @@ -631,6 +642,11 @@ static struct sd_op_template sd_ops[] = {
>  		.process_work = local_stat_sheep,
>  	},
>  
> +	[SD_OP_STAT_RECOVERY] = {
> +		.type = SD_OP_TYPE_LOCAL,
> +		.process_main = local_stat_recovery,
> +	},
> +
>  	[SD_OP_STAT_CLUSTER] = {
>  		.type = SD_OP_TYPE_LOCAL,
>  		.force = 1,





More information about the sheepdog mailing list