[Sheepdog] [PATCH] collie: add an operation to show nodes in recovery
Liu Yuan
namei.unix at gmail.com
Wed Apr 25 04:10:06 CEST 2012
On 04/23/2012 11:32 AM, Li Wenpeng wrote:
> From: levin li <xingke.lwp at taobao.com>
>
> It's useful to show nodes in recovery when we debuging
> or maintaining the cluster, many times we want to know
> which nodes are in recovery, before this, we check the
> logs to find out whether the cluster has ended recovery,
> but with this patch, it's easier.
>
> usage:
>
> #collie node recovery
>
> output:
>
> Nodes In Recovery:
> M Id Host:Port V-Nodes Zone
> - 4 127.0.0.1:7006 64 6
> - 5 127.0.0.1:7007 64 7
> - 8 127.0.0.1:7010 64 10
> - 9 127.0.0.1:7011 64 11
> - 10 127.0.0.1:7012 64 12
>
Why we need show M (I guess it denotes mastership)? It seems that you
copy this scheme from 'node list'...but actually, M column need to be
removed.
Thanks,
Yuan
> Signed-off-by: levin li <xingke.lwp at taobao.com>
> ---
> collie/node.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> include/sheep.h | 1 +
> sheep/ops.c | 16 ++++++++++++++++
> 3 files changed, 71 insertions(+), 0 deletions(-)
>
> diff --git a/collie/node.c b/collie/node.c
> index a8804f8..e669197 100644
> --- a/collie/node.c
> +++ b/collie/node.c
> @@ -116,11 +116,65 @@ static int node_info(int argc, char **argv)
> return EXIT_SUCCESS;
> }
>
> +static int node_recovery(int argc, char **argv)
> +{
> + int i, ret;
> +
> + if (!raw_output) {
> + printf("Nodes In Recovery:\n");
> + printf("M Id Host:Port V-Nodes Zone\n");
> + }
> +
> + for (i = 0; i < nr_nodes; i++) {
> + char host[128];
> + int fd;
> + unsigned wlen, rlen;
> + struct sd_node_req req;
> + struct sd_node_rsp *rsp = (struct sd_node_rsp *)&req;
> +
> + addr_to_str(host, sizeof(host), node_list_entries[i].addr, 0);
> +
> + fd = connect_to(host, node_list_entries[i].port);
> + if (fd < 0)
> + return EXIT_FAILURE;
> +
> + memset(&req, 0, sizeof(req));
> +
> + req.opcode = SD_OP_STAT_RECOVERY;
> +
> + wlen = 0;
> + rlen = 0;
> + ret = exec_req(fd, (struct sd_req *)&req, NULL, &wlen, &rlen);
> + close(fd);
> +
> + if (!ret && rsp->result == SD_RES_SUCCESS) {
> + addr_to_str(host, sizeof(host),
> + node_list_entries[i].addr, node_list_entries[i].port);
> + if (i == master_idx) {
> + if (highlight)
> + printf(TEXT_BOLD);
> + printf(raw_output ? "* %d %s %d %d\n" : "* %4d %-20s\t%2d%11d\n",
> + i, host, node_list_entries[i].nr_vnodes,
> + node_list_entries[i].zone);
> + if (highlight)
> + printf(TEXT_NORMAL);
> + } else
> + printf(raw_output ? "- %d %s %d %d\n" : "- %4d %-20s\t%2d%11d\n",
> + i, host, node_list_entries[i].nr_vnodes,
> + node_list_entries[i].zone);
> + }
> + }
> +
> + return EXIT_SUCCESS;
> +}
> +
> static struct subcommand node_cmd[] = {
> {"list", NULL, "aprh", "list nodes",
> SUBCMD_FLAG_NEED_NODELIST, node_list},
> {"info", NULL, "aprh", "show information about each node",
> SUBCMD_FLAG_NEED_NODELIST, node_info},
> + {"recovery", NULL, "aprh", "show nodes in recovery",
> + SUBCMD_FLAG_NEED_NODELIST, node_recovery},
> {NULL,},
> };
>
> diff --git a/include/sheep.h b/include/sheep.h
> index fc2ac58..7e287c4 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -45,6 +45,7 @@
> #define SD_OP_CLEANUP 0x94
> #define SD_OP_TRACE 0x95
> #define SD_OP_TRACE_CAT 0x96
> +#define SD_OP_STAT_RECOVERY 0x97
>
> #define SD_FLAG_CMD_IO_LOCAL 0x0010
> #define SD_FLAG_CMD_RECOVERY 0x0020
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 99bde61..815c268 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -289,6 +289,17 @@ static int local_stat_sheep(const struct sd_req *req, struct sd_rsp *rsp,
> return stat_sheep(&node_rsp->store_size, &node_rsp->store_free, epoch);
> }
>
> +static int local_stat_recovery(const struct sd_req *req, struct sd_rsp *rsp,
> + void *data)
> +{
> + if (node_in_recovery())
> + return SD_RES_SUCCESS;
> + else
> + return SD_RES_UNKNOWN;
> +
> + return SD_RES_UNKNOWN;
> +}
> +
> static int local_stat_cluster(const struct sd_req *req, struct sd_rsp *rsp,
> void *data)
> {
> @@ -631,6 +642,11 @@ static struct sd_op_template sd_ops[] = {
> .process_work = local_stat_sheep,
> },
>
> + [SD_OP_STAT_RECOVERY] = {
> + .type = SD_OP_TYPE_LOCAL,
> + .process_main = local_stat_recovery,
> + },
> +
> [SD_OP_STAT_CLUSTER] = {
> .type = SD_OP_TYPE_LOCAL,
> .force = 1,
More information about the sheepdog
mailing list