[sheepdog] [PATCH 1/3] sheep: add a kill node operation

Liu Yuan namei.unix at gmail.com
Mon Jul 23 05:23:34 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

This command is supposed to shut down the specified node gracefully.

usage:
 $ collie node kill node_id

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 collie/node.c            |   37 +++++++++++++++++++++++++++++++++++++
 include/internal_proto.h |    2 ++
 include/sheepdog_proto.h |    1 +
 sheep/ops.c              |   14 ++++++++++++++
 sheep/request.c          |    4 ++++
 sheep/sheep.c            |    4 +++-
 6 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/collie/node.c b/collie/node.c
index a778605..87cf97b 100644
--- a/collie/node.c
+++ b/collie/node.c
@@ -157,7 +157,44 @@ static int node_recovery(int argc, char **argv)
 	return EXIT_SUCCESS;
 }
 
+static int node_kill(int argc, char **argv)
+{
+	char host[128];
+	int fd, node_id, ret;
+	unsigned wlen, rlen;
+	struct sd_node_req req;
+	struct sd_node_rsp *rsp = (struct sd_node_rsp *)&req;
+
+	node_id = strtol(argv[optind++], NULL, 10);
+	if (node_id < 0 || node_id >= sd_nodes_nr) {
+		fprintf(stderr, "Invalid node id '%d'\n", node_id);
+		exit(EXIT_USAGE);
+	}
+
+	addr_to_str(host, sizeof(host), sd_nodes[node_id].nid.addr, 0);
+
+	fd = connect_to(host, sd_nodes[node_id].nid.port);
+	if (fd < 0)
+		return EXIT_FAILURE;
+
+	sd_init_req((struct sd_req *)&req, SD_OP_KILL_NODE);
+
+	wlen = 0;
+	rlen = 0;
+	ret = exec_req(fd, (struct sd_req *)&req, NULL, &wlen, &rlen);
+	close(fd);
+
+	if (ret || rsp->result != SD_RES_SUCCESS) {
+		fprintf(stderr, "Failed to execute request\n");
+		exit(EXIT_FAILURE);
+	}
+
+	return EXIT_SUCCESS;
+}
+
 static struct subcommand node_cmd[] = {
+	{"kill", "<node id>", "aprh", "kill node",
+	 SUBCMD_FLAG_NEED_NODELIST|SUBCMD_FLAG_NEED_THIRD_ARG, node_kill},
 	{"list", NULL, "aprh", "list nodes",
 	 SUBCMD_FLAG_NEED_NODELIST, node_list},
 	{"info", NULL, "aprh", "show information about each node",
diff --git a/include/internal_proto.h b/include/internal_proto.h
index f08816f..abc14dc 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -53,6 +53,7 @@
 #define SD_OP_STAT_RECOVERY  0x97
 #define SD_OP_FLUSH_DEL_CACHE  0x98
 #define SD_OP_NOTIFY_VDI_DEL 0x99
+#define SD_OP_KILL_NODE      0x9A
 #define SD_OP_GET_OBJ_LIST   0xA1
 #define SD_OP_GET_EPOCH      0xA2
 #define SD_OP_CREATE_AND_WRITE_PEER 0xA3
@@ -84,6 +85,7 @@
 #define SD_STATUS_WAIT_FOR_JOIN     0x00000004
 #define SD_STATUS_SHUTDOWN          0x00000008
 #define SD_STATUS_HALT              0x00000020
+#define SD_STATUS_KILLED            0x00000040
 
 struct sd_so_req {
 	uint8_t		proto_ver;
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index e06d830..45a4b81 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -67,6 +67,7 @@
 #define SD_RES_NO_SUPPORT       0x21 /* Operation is not supported by backend store */
 #define SD_RES_CLUSTER_RECOVERING 0x22 /* Cluster is recovering. */
 #define SD_RES_OBJ_RECOVERING     0x23 /* Object is recovering */
+#define SD_RES_KILLED           0x24 /* Node is killed */
 
 /* errors above 0x80 are sheepdog-internal */
 
diff --git a/sheep/ops.c b/sheep/ops.c
index 6802aea..638f57b 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -556,6 +556,13 @@ static int local_trace_cat_ops(const struct sd_req *req, struct sd_rsp *rsp, voi
 	return SD_RES_SUCCESS;
 }
 
+static int local_kill_node(const struct sd_req *req, struct sd_rsp *rsp, void *data)
+{
+	sys_stat_set(SD_STATUS_KILLED);
+
+	return SD_RES_SUCCESS;
+}
+
 static int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
 				  uint64_t oid, char *buf)
 {
@@ -935,6 +942,13 @@ static struct sd_op_template sd_ops[] = {
 		.process_main = local_trace_cat_ops,
 	},
 
+	[SD_OP_KILL_NODE] = {
+		.name = "KILL_NODE",
+		.type = SD_OP_TYPE_LOCAL,
+		.force = 1,
+		.process_main = local_kill_node,
+	},
+
 	/* gateway I/O operations */
 	[SD_OP_CREATE_AND_WRITE_OBJ] = {
 		.name = "CREATE_AND_WRITE_OBJ",
diff --git a/sheep/request.c b/sheep/request.c
index 35ac488..1a2005e 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -77,6 +77,7 @@ static void gateway_op_done(struct work *work)
 	case SD_RES_NETWORK_ERROR:
 	case SD_RES_WAIT_FOR_JOIN:
 	case SD_RES_WAIT_FOR_FORMAT:
+	case SD_RES_KILLED:
 		dprintf("retrying failed I/O request "
 			"op %s result %d epoch %d, sys epoch %d\n",
 			op_name(req->op),
@@ -329,6 +330,9 @@ static void queue_request(struct request *req)
 	dprintf("%s\n", op_name(req->op));
 
 	switch (sys->status) {
+	case SD_STATUS_KILLED:
+		rsp->result = SD_RES_KILLED;
+		goto done;
 	case SD_STATUS_SHUTDOWN:
 		rsp->result = SD_RES_SHUTDOWN;
 		goto done;
diff --git a/sheep/sheep.c b/sheep/sheep.c
index 2e208de..53da29d 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -314,7 +314,9 @@ int main(int argc, char **argv)
 
 	vprintf(SDOG_NOTICE, "sheepdog daemon (version %s) started\n", PACKAGE_VERSION);
 
-	while (!sys_stat_shutdown() || sys->nr_outstanding_reqs != 0)
+	while (sys->nr_outstanding_reqs != 0 ||
+	       (sys->status != SD_STATUS_KILLED &&
+		sys->status != SD_STATUS_SHUTDOWN))
 		event_loop(-1);
 
 	vprintf(SDOG_INFO, "shutdown\n");
-- 
1.7.10.2




More information about the sheepdog mailing list