[Sheepdog] [PATCH v2] collie: add an operation to show nodes in recovery

Li Wenpeng levin108 at gmail.com
Wed Apr 25 05:29:33 CEST 2012


From: levin li <xingke.lwp at taobao.com>

It's useful to show nodes in recovery when we debuging
or maintaining the cluster, many times we want to know
which nodes are in recovery, before this, we check the
logs to find out whether the cluster has ended recovery,
but with this patch, it's easier.

usage:

#collie node recovery

output:

Nodes In Recovery:
 Id   Host:Port         V-Nodes       Zone
  4   127.0.0.1:7006     	64          6
  5   127.0.0.1:7007      	64          7
  8   127.0.0.1:7010      	64         10
  9   127.0.0.1:7011      	64         11
 10   127.0.0.1:7012      	64         12

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
sorry, forget to update commit log in the previous patch update

thanks

diff --git a/collie/node.c b/collie/node.c
index a8804f8..ef33b71 100644
--- a/collie/node.c
+++ b/collie/node.c
@@ -116,11 +116,56 @@ static int node_info(int argc, char **argv)
 	return EXIT_SUCCESS;
 }
 
+static int node_recovery(int argc, char **argv)
+{
+	int i, ret;
+
+	if (!raw_output) {
+		printf("Nodes In Recovery:\n");
+		printf("  Id   Host:Port         V-Nodes       Zone\n");
+	}
+
+	for (i = 0; i < nr_nodes; i++) {
+		char host[128];
+		int fd;
+		unsigned wlen, rlen;
+		struct sd_node_req req;
+		struct sd_node_rsp *rsp = (struct sd_node_rsp *)&req;
+
+		addr_to_str(host, sizeof(host), node_list_entries[i].addr, 0);
+
+		fd = connect_to(host, node_list_entries[i].port);
+		if (fd < 0)
+			return EXIT_FAILURE;
+
+		memset(&req, 0, sizeof(req));
+
+		req.opcode = SD_OP_STAT_RECOVERY;
+
+		wlen = 0;
+		rlen = 0;
+		ret = exec_req(fd, (struct sd_req *)&req, NULL, &wlen, &rlen);
+		close(fd);
+
+		if (!ret && rsp->result == SD_RES_SUCCESS) {
+			addr_to_str(host, sizeof(host),
+					node_list_entries[i].addr, node_list_entries[i].port);
+			printf(raw_output ? "%d %s %d %d\n" : "%4d   %-20s%5d%11d\n",
+				   i, host, node_list_entries[i].nr_vnodes,
+				   node_list_entries[i].zone);
+		}
+	}
+
+	return EXIT_SUCCESS;
+}
+
 static struct subcommand node_cmd[] = {
 	{"list", NULL, "aprh", "list nodes",
 	 SUBCMD_FLAG_NEED_NODELIST, node_list},
 	{"info", NULL, "aprh", "show information about each node",
 	 SUBCMD_FLAG_NEED_NODELIST, node_info},
+	{"recovery", NULL, "aprh", "show nodes in recovery",
+	 SUBCMD_FLAG_NEED_NODELIST, node_recovery},
 	{NULL,},
 };
 
diff --git a/include/sheep.h b/include/sheep.h
index fc2ac58..7e287c4 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -45,6 +45,7 @@
 #define SD_OP_CLEANUP        0x94
 #define SD_OP_TRACE          0x95
 #define SD_OP_TRACE_CAT      0x96
+#define SD_OP_STAT_RECOVERY  0x97
 
 #define SD_FLAG_CMD_IO_LOCAL   0x0010
 #define SD_FLAG_CMD_RECOVERY 0x0020
diff --git a/sheep/ops.c b/sheep/ops.c
index 99bde61..815c268 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -289,6 +289,17 @@ static int local_stat_sheep(const struct sd_req *req, struct sd_rsp *rsp,
 	return stat_sheep(&node_rsp->store_size, &node_rsp->store_free, epoch);
 }
 
+static int local_stat_recovery(const struct sd_req *req, struct sd_rsp *rsp,
+					void *data)
+{
+	if (node_in_recovery())
+		return SD_RES_SUCCESS;
+	else
+		return SD_RES_UNKNOWN;
+
+	return SD_RES_UNKNOWN;
+}
+
 static int local_stat_cluster(const struct sd_req *req, struct sd_rsp *rsp,
 			      void *data)
 {
@@ -631,6 +642,11 @@ static struct sd_op_template sd_ops[] = {
 		.process_work = local_stat_sheep,
 	},
 
+	[SD_OP_STAT_RECOVERY] = {
+		.type = SD_OP_TYPE_LOCAL,
+		.process_main = local_stat_recovery,
+	},
+
 	[SD_OP_STAT_CLUSTER] = {
 		.type = SD_OP_TYPE_LOCAL,
 		.force = 1,
-- 
1.7.1




More information about the sheepdog mailing list