[Sheepdog] [PATCH v2] collie: add an operation to show nodes in recovery

Liu Yuan namei.unix at gmail.com
Wed Apr 25 08:06:04 CEST 2012


On 04/25/2012 11:29 AM, Li Wenpeng wrote:

> From: levin li <xingke.lwp at taobao.com>
> 
> It's useful to show nodes in recovery when we debuging
> or maintaining the cluster, many times we want to know
> which nodes are in recovery, before this, we check the
> logs to find out whether the cluster has ended recovery,
> but with this patch, it's easier.
> 
> usage:
> 
> #collie node recovery
> 
> output:
> 
> Nodes In Recovery:
>  Id   Host:Port         V-Nodes       Zone
>   4   127.0.0.1:7006     	64          6
>   5   127.0.0.1:7007      	64          7
>   8   127.0.0.1:7010      	64         10
>   9   127.0.0.1:7011      	64         11
>  10   127.0.0.1:7012      	64         12
> 
> Signed-off-by: levin li <xingke.lwp at taobao.com>
> ---
> sorry, forget to update commit log in the previous patch update
> 
> thanks
> 
> diff --git a/collie/node.c b/collie/node.c
> index a8804f8..ef33b71 100644
> --- a/collie/node.c
> +++ b/collie/node.c
> @@ -116,11 +116,56 @@ static int node_info(int argc, char **argv)
>  	return EXIT_SUCCESS;
>  }
>  
> +static int node_recovery(int argc, char **argv)
> +{
> +	int i, ret;
> +
> +	if (!raw_output) {
> +		printf("Nodes In Recovery:\n");
> +		printf("  Id   Host:Port         V-Nodes       Zone\n");
> +	}
> +
> +	for (i = 0; i < nr_nodes; i++) {
> +		char host[128];
> +		int fd;
> +		unsigned wlen, rlen;
> +		struct sd_node_req req;
> +		struct sd_node_rsp *rsp = (struct sd_node_rsp *)&req;
> +
> +		addr_to_str(host, sizeof(host), node_list_entries[i].addr, 0);
> +
> +		fd = connect_to(host, node_list_entries[i].port);
> +		if (fd < 0)
> +			return EXIT_FAILURE;
> +
> +		memset(&req, 0, sizeof(req));
> +
> +		req.opcode = SD_OP_STAT_RECOVERY;
> +
> +		wlen = 0;
> +		rlen = 0;
> +		ret = exec_req(fd, (struct sd_req *)&req, NULL, &wlen, &rlen);
> +		close(fd);
> +
> +		if (!ret && rsp->result == SD_RES_SUCCESS) {
> +			addr_to_str(host, sizeof(host),
> +					node_list_entries[i].addr, node_list_entries[i].port);
> +			printf(raw_output ? "%d %s %d %d\n" : "%4d   %-20s%5d%11d\n",
> +				   i, host, node_list_entries[i].nr_vnodes,
> +				   node_list_entries[i].zone);
> +		}
> +	}
> +
> +	return EXIT_SUCCESS;
> +}
> +
>  static struct subcommand node_cmd[] = {
>  	{"list", NULL, "aprh", "list nodes",
>  	 SUBCMD_FLAG_NEED_NODELIST, node_list},
>  	{"info", NULL, "aprh", "show information about each node",
>  	 SUBCMD_FLAG_NEED_NODELIST, node_info},
> +	{"recovery", NULL, "aprh", "show nodes in recovery",
> +	 SUBCMD_FLAG_NEED_NODELIST, node_recovery},
>  	{NULL,},
>  };
>  
> diff --git a/include/sheep.h b/include/sheep.h
> index fc2ac58..7e287c4 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -45,6 +45,7 @@
>  #define SD_OP_CLEANUP        0x94
>  #define SD_OP_TRACE          0x95
>  #define SD_OP_TRACE_CAT      0x96
> +#define SD_OP_STAT_RECOVERY  0x97
>  
>  #define SD_FLAG_CMD_IO_LOCAL   0x0010
>  #define SD_FLAG_CMD_RECOVERY 0x0020
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 99bde61..815c268 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -289,6 +289,17 @@ static int local_stat_sheep(const struct sd_req *req, struct sd_rsp *rsp,
>  	return stat_sheep(&node_rsp->store_size, &node_rsp->store_free, epoch);
>  }
>  
> +static int local_stat_recovery(const struct sd_req *req, struct sd_rsp *rsp,
> +					void *data)
> +{
> +	if (node_in_recovery())
> +		return SD_RES_SUCCESS;
> +	else
> +		return SD_RES_UNKNOWN;
> +
> +	return SD_RES_UNKNOWN;
> +}
> +
>  static int local_stat_cluster(const struct sd_req *req, struct sd_rsp *rsp,
>  			      void *data)
>  {
> @@ -631,6 +642,11 @@ static struct sd_op_template sd_ops[] = {
>  		.process_work = local_stat_sheep,
>  	},
>  
> +	[SD_OP_STAT_RECOVERY] = {
> +		.type = SD_OP_TYPE_LOCAL,
> +		.process_main = local_stat_recovery,
> +	},
> +
>  	[SD_OP_STAT_CLUSTER] = {
>  		.type = SD_OP_TYPE_LOCAL,
>  		.force = 1,


Applied

Thanks,
Yuan



More information about the sheepdog mailing list