On 04/23/2012 11:32 AM, Li Wenpeng wrote: > From: levin li <xingke.lwp at taobao.com> > > It's useful to show nodes in recovery when we debuging > or maintaining the cluster, many times we want to know > which nodes are in recovery, before this, we check the > logs to find out whether the cluster has ended recovery, > but with this patch, it's easier. > > usage: > > #collie node recovery > > output: > > Nodes In Recovery: > M Id Host:Port V-Nodes Zone > - 4 127.0.0.1:7006 64 6 > - 5 127.0.0.1:7007 64 7 > - 8 127.0.0.1:7010 64 10 > - 9 127.0.0.1:7011 64 11 > - 10 127.0.0.1:7012 64 12 > Why we need show M (I guess it denotes mastership)? It seems that you copy this scheme from 'node list'...but actually, M column need to be removed. Thanks, Yuan > Signed-off-by: levin li <xingke.lwp at taobao.com> > --- > collie/node.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > include/sheep.h | 1 + > sheep/ops.c | 16 ++++++++++++++++ > 3 files changed, 71 insertions(+), 0 deletions(-) > > diff --git a/collie/node.c b/collie/node.c > index a8804f8..e669197 100644 > --- a/collie/node.c > +++ b/collie/node.c > @@ -116,11 +116,65 @@ static int node_info(int argc, char **argv) > return EXIT_SUCCESS; > } > > +static int node_recovery(int argc, char **argv) > +{ > + int i, ret; > + > + if (!raw_output) { > + printf("Nodes In Recovery:\n"); > + printf("M Id Host:Port V-Nodes Zone\n"); > + } > + > + for (i = 0; i < nr_nodes; i++) { > + char host[128]; > + int fd; > + unsigned wlen, rlen; > + struct sd_node_req req; > + struct sd_node_rsp *rsp = (struct sd_node_rsp *)&req; > + > + addr_to_str(host, sizeof(host), node_list_entries[i].addr, 0); > + > + fd = connect_to(host, node_list_entries[i].port); > + if (fd < 0) > + return EXIT_FAILURE; > + > + memset(&req, 0, sizeof(req)); > + > + req.opcode = SD_OP_STAT_RECOVERY; > + > + wlen = 0; > + rlen = 0; > + ret = exec_req(fd, (struct sd_req *)&req, NULL, &wlen, &rlen); > + close(fd); > + > + if (!ret && rsp->result == SD_RES_SUCCESS) { > + addr_to_str(host, sizeof(host), > + node_list_entries[i].addr, node_list_entries[i].port); > + if (i == master_idx) { > + if (highlight) > + printf(TEXT_BOLD); > + printf(raw_output ? "* %d %s %d %d\n" : "* %4d %-20s\t%2d%11d\n", > + i, host, node_list_entries[i].nr_vnodes, > + node_list_entries[i].zone); > + if (highlight) > + printf(TEXT_NORMAL); > + } else > + printf(raw_output ? "- %d %s %d %d\n" : "- %4d %-20s\t%2d%11d\n", > + i, host, node_list_entries[i].nr_vnodes, > + node_list_entries[i].zone); > + } > + } > + > + return EXIT_SUCCESS; > +} > + > static struct subcommand node_cmd[] = { > {"list", NULL, "aprh", "list nodes", > SUBCMD_FLAG_NEED_NODELIST, node_list}, > {"info", NULL, "aprh", "show information about each node", > SUBCMD_FLAG_NEED_NODELIST, node_info}, > + {"recovery", NULL, "aprh", "show nodes in recovery", > + SUBCMD_FLAG_NEED_NODELIST, node_recovery}, > {NULL,}, > }; > > diff --git a/include/sheep.h b/include/sheep.h > index fc2ac58..7e287c4 100644 > --- a/include/sheep.h > +++ b/include/sheep.h > @@ -45,6 +45,7 @@ > #define SD_OP_CLEANUP 0x94 > #define SD_OP_TRACE 0x95 > #define SD_OP_TRACE_CAT 0x96 > +#define SD_OP_STAT_RECOVERY 0x97 > > #define SD_FLAG_CMD_IO_LOCAL 0x0010 > #define SD_FLAG_CMD_RECOVERY 0x0020 > diff --git a/sheep/ops.c b/sheep/ops.c > index 99bde61..815c268 100644 > --- a/sheep/ops.c > +++ b/sheep/ops.c > @@ -289,6 +289,17 @@ static int local_stat_sheep(const struct sd_req *req, struct sd_rsp *rsp, > return stat_sheep(&node_rsp->store_size, &node_rsp->store_free, epoch); > } > > +static int local_stat_recovery(const struct sd_req *req, struct sd_rsp *rsp, > + void *data) > +{ > + if (node_in_recovery()) > + return SD_RES_SUCCESS; > + else > + return SD_RES_UNKNOWN; > + > + return SD_RES_UNKNOWN; > +} > + > static int local_stat_cluster(const struct sd_req *req, struct sd_rsp *rsp, > void *data) > { > @@ -631,6 +642,11 @@ static struct sd_op_template sd_ops[] = { > .process_work = local_stat_sheep, > }, > > + [SD_OP_STAT_RECOVERY] = { > + .type = SD_OP_TYPE_LOCAL, > + .process_main = local_stat_recovery, > + }, > + > [SD_OP_STAT_CLUSTER] = { > .type = SD_OP_TYPE_LOCAL, > .force = 1, |