From: Liu Yuan <tailai.ly at taobao.com> This command is supposed to shut down the specified node gracefully. usage: $ collie node kill node_id Signed-off-by: Liu Yuan <tailai.ly at taobao.com> --- collie/node.c | 37 +++++++++++++++++++++++++++++++++++++ include/internal_proto.h | 2 ++ include/sheepdog_proto.h | 1 + sheep/ops.c | 14 ++++++++++++++ sheep/request.c | 4 ++++ sheep/sheep.c | 4 +++- 6 files changed, 61 insertions(+), 1 deletion(-) diff --git a/collie/node.c b/collie/node.c index a778605..87cf97b 100644 --- a/collie/node.c +++ b/collie/node.c @@ -157,7 +157,44 @@ static int node_recovery(int argc, char **argv) return EXIT_SUCCESS; } +static int node_kill(int argc, char **argv) +{ + char host[128]; + int fd, node_id, ret; + unsigned wlen, rlen; + struct sd_node_req req; + struct sd_node_rsp *rsp = (struct sd_node_rsp *)&req; + + node_id = strtol(argv[optind++], NULL, 10); + if (node_id < 0 || node_id >= sd_nodes_nr) { + fprintf(stderr, "Invalid node id '%d'\n", node_id); + exit(EXIT_USAGE); + } + + addr_to_str(host, sizeof(host), sd_nodes[node_id].nid.addr, 0); + + fd = connect_to(host, sd_nodes[node_id].nid.port); + if (fd < 0) + return EXIT_FAILURE; + + sd_init_req((struct sd_req *)&req, SD_OP_KILL_NODE); + + wlen = 0; + rlen = 0; + ret = exec_req(fd, (struct sd_req *)&req, NULL, &wlen, &rlen); + close(fd); + + if (ret || rsp->result != SD_RES_SUCCESS) { + fprintf(stderr, "Failed to execute request\n"); + exit(EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} + static struct subcommand node_cmd[] = { + {"kill", "<node id>", "aprh", "kill node", + SUBCMD_FLAG_NEED_NODELIST|SUBCMD_FLAG_NEED_THIRD_ARG, node_kill}, {"list", NULL, "aprh", "list nodes", SUBCMD_FLAG_NEED_NODELIST, node_list}, {"info", NULL, "aprh", "show information about each node", diff --git a/include/internal_proto.h b/include/internal_proto.h index f08816f..abc14dc 100644 --- a/include/internal_proto.h +++ b/include/internal_proto.h @@ -53,6 +53,7 @@ #define SD_OP_STAT_RECOVERY 0x97 #define SD_OP_FLUSH_DEL_CACHE 0x98 #define SD_OP_NOTIFY_VDI_DEL 0x99 +#define SD_OP_KILL_NODE 0x9A #define SD_OP_GET_OBJ_LIST 0xA1 #define SD_OP_GET_EPOCH 0xA2 #define SD_OP_CREATE_AND_WRITE_PEER 0xA3 @@ -84,6 +85,7 @@ #define SD_STATUS_WAIT_FOR_JOIN 0x00000004 #define SD_STATUS_SHUTDOWN 0x00000008 #define SD_STATUS_HALT 0x00000020 +#define SD_STATUS_KILLED 0x00000040 struct sd_so_req { uint8_t proto_ver; diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index e06d830..45a4b81 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -67,6 +67,7 @@ #define SD_RES_NO_SUPPORT 0x21 /* Operation is not supported by backend store */ #define SD_RES_CLUSTER_RECOVERING 0x22 /* Cluster is recovering. */ #define SD_RES_OBJ_RECOVERING 0x23 /* Object is recovering */ +#define SD_RES_KILLED 0x24 /* Node is killed */ /* errors above 0x80 are sheepdog-internal */ diff --git a/sheep/ops.c b/sheep/ops.c index 6802aea..638f57b 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -556,6 +556,13 @@ static int local_trace_cat_ops(const struct sd_req *req, struct sd_rsp *rsp, voi return SD_RES_SUCCESS; } +static int local_kill_node(const struct sd_req *req, struct sd_rsp *rsp, void *data) +{ + sys_stat_set(SD_STATUS_KILLED); + + return SD_RES_SUCCESS; +} + static int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch, uint64_t oid, char *buf) { @@ -935,6 +942,13 @@ static struct sd_op_template sd_ops[] = { .process_main = local_trace_cat_ops, }, + [SD_OP_KILL_NODE] = { + .name = "KILL_NODE", + .type = SD_OP_TYPE_LOCAL, + .force = 1, + .process_main = local_kill_node, + }, + /* gateway I/O operations */ [SD_OP_CREATE_AND_WRITE_OBJ] = { .name = "CREATE_AND_WRITE_OBJ", diff --git a/sheep/request.c b/sheep/request.c index 35ac488..1a2005e 100644 --- a/sheep/request.c +++ b/sheep/request.c @@ -77,6 +77,7 @@ static void gateway_op_done(struct work *work) case SD_RES_NETWORK_ERROR: case SD_RES_WAIT_FOR_JOIN: case SD_RES_WAIT_FOR_FORMAT: + case SD_RES_KILLED: dprintf("retrying failed I/O request " "op %s result %d epoch %d, sys epoch %d\n", op_name(req->op), @@ -329,6 +330,9 @@ static void queue_request(struct request *req) dprintf("%s\n", op_name(req->op)); switch (sys->status) { + case SD_STATUS_KILLED: + rsp->result = SD_RES_KILLED; + goto done; case SD_STATUS_SHUTDOWN: rsp->result = SD_RES_SHUTDOWN; goto done; diff --git a/sheep/sheep.c b/sheep/sheep.c index 2e208de..53da29d 100644 --- a/sheep/sheep.c +++ b/sheep/sheep.c @@ -314,7 +314,9 @@ int main(int argc, char **argv) vprintf(SDOG_NOTICE, "sheepdog daemon (version %s) started\n", PACKAGE_VERSION); - while (!sys_stat_shutdown() || sys->nr_outstanding_reqs != 0) + while (sys->nr_outstanding_reqs != 0 || + (sys->status != SD_STATUS_KILLED && + sys->status != SD_STATUS_SHUTDOWN)) event_loop(-1); vprintf(SDOG_INFO, "shutdown\n"); -- 1.7.10.2 |