[Sheepdog] [PATCH v2 1/3] collie: add manual recover subcommand for cluster

Mon Oct 24 11:19:15 CEST 2011

From: Liu Yuan <tailai.ly at taobao.com>

Currently, the sheepdog cluster cannot get recovered for below conditions

1) the master node is physically down after the cluster crashes with
   different epoches during recovery.
2) some of nodes are physically down after the cluster is shutdowned
   during recovery.

This patch add a manual recovery mechanism. With this patch, you can manually
recover the cluster at any live node by:

$ collie cluster recover
and this prompt you some warning, then type "yes"

or if you know well what you are doing, you can
$ collie cluster recover -f

[Use with Caution]

This command will increment cluster epoch by 1!

for 1) case, you need to try to start up the nodes in sequence for the first
round until the master node is up, thanks to the mastership mechanism. If
unfortunately not, you can simply run the recover command. After that, you can
freely join other good nodes in.

for 2) case, you'd better try to start up all the nodes to see if any of nodes get
physically down. If any, unfortunately, you can simply run the recover command.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 collie/cluster.c |   64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 collie/collie.c  |    1 +
 include/sheep.h  |    1 +
 3 files changed, 66 insertions(+), 0 deletions(-)

diff --git a/collie/cluster.c b/collie/cluster.c
index adc3b5f..2f0e66f 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -10,6 +10,8 @@
  */
 
 #include <time.h>
+#include <string.h>
+#include <ctype.h>
 #include <sys/time.h>
 
 #include "collie.h"
@@ -17,6 +19,7 @@
 struct cluster_cmd_data {
 	int copies;
 	int nohalt;
+	int force;
 } cluster_cmd_data;
 
 static void set_nohalt(uint16_t *p)
@@ -169,6 +172,62 @@ static int cluster_shutdown(int argc, char **argv)
 	return EXIT_SUCCESS;
 }
 
+#define RECOVER_PRINT \
+"CAUTION!Please assure me that you have tried booting up all the\n\
+cluster nodes before you run this command.\n\n\
+In two cases you need to recover the cluster manually:\n\
+\t1) The master node is failed to boot in different epoch condition.\n\
+\t2) Some nodes are failed to boot after the cluster is shutdown-ed.\n\
+\nPlease type to continue [Yes/No]: "
+
+static int cluster_recover(int argc, char **argv)
+{
+	int fd, ret;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	unsigned rlen, wlen;
+	char str[123] = {'\0'};
+
+	fd = connect_to(sdhost, sdport);
+	if (fd < 0)
+		return EXIT_SYSFAIL;
+
+	if (!cluster_cmd_data.force) {
+		int i, l;
+		printf(RECOVER_PRINT);
+		ret = scanf("%s", str);
+		if (ret < 0)
+			return EXIT_SYSFAIL;
+		l = strlen(str);
+		for (i = 0; i < l; i++)
+			str[i] = tolower(str[i]);
+		if (strncmp(str, "yes", 3) !=0)
+			return EXIT_SUCCESS;
+	}
+
+	memset(&hdr, 0, sizeof(hdr));
+
+	hdr.opcode = SD_OP_RECOVER;
+	hdr.epoch = node_list_version;
+
+	rlen = 0;
+	wlen = 0;
+	ret = exec_req(fd, &hdr, NULL, &wlen, &rlen);
+	close(fd);
+
+	if (ret) {
+		fprintf(stderr, "failed to connect\n");
+		return EXIT_SYSFAIL;
+	}
+
+	if (rsp->result != SD_RES_SUCCESS) {
+		fprintf(stderr, "%s\n", sd_strerror(rsp->result));
+		return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
+
 static struct subcommand cluster_cmd[] = {
 	{"info", NULL, "aprh", "show cluster information",
 	 0, cluster_info},
@@ -176,6 +235,8 @@ static struct subcommand cluster_cmd[] = {
 	 0, cluster_format},
 	{"shutdown", NULL, "aph", "stop Sheepdog",
 	 SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown},
+	{"recover", NULL, "afph", "manually recover the cluster",
+	0, cluster_recover},
 	{NULL,},
 };
 
@@ -197,6 +258,9 @@ static int cluster_parser(int ch, char *opt)
 	case 'H':
 		cluster_cmd_data.nohalt = 1;
 		break;
+	case 'f':
+		cluster_cmd_data.force = 1;
+		break;
 	}
 
 	return 0;
diff --git a/collie/collie.c b/collie/collie.c
index 456f2cd..fbe9956 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -42,6 +42,7 @@ static const struct sd_option collie_options[] = {
 	/* cluster options */
 	{'c', "copies", 1, "set the number of data redundancy"},
 	{'H', "nohalt", 0, "serve the IO rquests even lack of enough redundant nodes"},
+	{'f', "force", 0, "never promt, you know well what you are doing"},
 
 	{ 0, NULL, 0, NULL },
 };
diff --git a/include/sheep.h b/include/sheep.h
index e06d34b..46ecf96 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -36,6 +36,7 @@
 #define SD_OP_STAT_CLUSTER   0x87
 #define SD_OP_KILL_NODE      0x88
 #define SD_OP_GET_VDI_ATTR   0x89
+#define SD_OP_RECOVER	     0x8A
 
 #define SD_FLAG_CMD_IO_LOCAL   0x0010
 #define SD_FLAG_CMD_RECOVERY 0x0020
-- 
1.7.6.1