[sheepdog] [PATCH V2 2/4] collie: add cluster recover {enable|disable} command

Yunkai Zhang yunkai.me at gmail.com
Tue Jul 31 11:05:53 CEST 2012


From: Yunkai Zhang <qiushu.zyk at taobao.com>

= Why we need to disable recovery =
After disable recovery, we can add multiple nodes into cluster leisurely,
only one recovery operation will be triggered when enable it again, it can help
me to reduce cluster's fluctuation.

PS: the recovery is 'enable' by default.

= Usage =
1) Disable cluster recovery:
   $ collie cluster recover disable
    *Note*: Only disable the recovery caused by JOIN envets
    Cluster recovery: disable

2) Add multiple nodes into cluster
   ...

3) Stop delay recovery transaction:
   $ collie cluster recover enable
     Cluster recovery: enable

This patch only implements the command-line tools to update inner status, next
patch will do the real work.

Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com>
---
 collie/cluster.c         | 39 +++++++++++++++++++++++++++++++++++++++
 include/internal_proto.h |  2 ++
 sheep/ops.c              | 26 ++++++++++++++++++++++++++
 sheep/sheep_priv.h       |  1 +
 4 files changed, 68 insertions(+)

diff --git a/collie/cluster.c b/collie/cluster.c
index 9ddd726..9e18a37 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -379,10 +379,49 @@ static int cluster_force_recover(int argc, char **argv)
 	return EXIT_SUCCESS;
 }
 
+static int cluster_disable_recover(int argc, char **argv)
+{
+	int ret;
+	struct sd_req hdr;
+
+	sd_init_req(&hdr, SD_OP_DISABLE_RECOVER);
+	hdr.epoch = sd_epoch;
+
+	ret = send_light_req(&hdr, sdhost, sdport);
+	if (ret)
+		return EXIT_FAILURE;
+
+	printf("*Note*: Only disable the recovery caused by JOIN envets\n"
+	       "Cluster recovery: disable\n");
+	return EXIT_SUCCESS;
+}
+
+static int cluster_enable_recover(int argc, char **argv)
+{
+	int ret;
+	struct sd_req hdr;
+
+	sd_init_req(&hdr, SD_OP_ENABLE_RECOVER);
+	hdr.epoch = sd_epoch;
+
+	ret = send_light_req(&hdr, sdhost, sdport);
+	if (ret)
+		return EXIT_FAILURE;
+
+	printf("Cluster recovery: enable\n");
+	return EXIT_SUCCESS;
+}
+
 /* Subcommand list of recover */
 static struct subcommand cluster_recover_cmd[] = {
 	{"force", NULL, NULL, "force recover cluster immediately",
 	 NULL, 0, cluster_force_recover},
+	{"enable", NULL, NULL, "enable automatic recovery and "
+				"run once recover if necessary",
+	 NULL, 0, cluster_enable_recover},
+	{"disable", NULL, NULL, "disable automatic recovery caused "
+				"by JOIN events (excluding LEAVE events now)",
+	 NULL, 0, cluster_disable_recover},
 	{NULL},
 };
 
diff --git a/include/internal_proto.h b/include/internal_proto.h
index a6e54b8..1651f9c 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -61,6 +61,8 @@
 #define SD_OP_WRITE_PEER     0xA5
 #define SD_OP_REMOVE_PEER    0xA6
 #define SD_OP_SET_CACHE_SIZE 0xA7
+#define SD_OP_ENABLE_RECOVER 0xA8
+#define SD_OP_DISABLE_RECOVER 0xA9
 
 /* internal flags for hdr.flags, must be above 0x80 */
 #define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/sheep/ops.c b/sheep/ops.c
index 75df906..c6e33ce 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -267,6 +267,20 @@ static int cluster_shutdown(const struct sd_req *req, struct sd_rsp *rsp,
 	return SD_RES_SUCCESS;
 }
 
+static int cluster_enable_recover(const struct sd_req *req,
+				    struct sd_rsp *rsp, void *data)
+{
+	sys->disable_recovery = 0;
+	return SD_RES_SUCCESS;
+}
+
+static int cluster_disable_recover(const struct sd_req *req,
+				   struct sd_rsp *rsp, void *data)
+{
+	sys->disable_recovery = 1;
+	return SD_RES_SUCCESS;
+}
+
 static int cluster_get_vdi_attr(struct request *req)
 {
 	const struct sd_req *hdr = &req->rq;
@@ -1026,6 +1040,18 @@ static struct sd_op_template sd_ops[] = {
 		.type = SD_OP_TYPE_PEER,
 		.process_work = peer_remove_obj,
 	},
+
+	[SD_OP_ENABLE_RECOVER] = {
+		.name = "ENABLE_RECOVER",
+		.type = SD_OP_TYPE_CLUSTER,
+		.process_main = cluster_enable_recover,
+	},
+
+	[SD_OP_DISABLE_RECOVER] = {
+		.name = "DISABLE_RECOVER",
+		.type = SD_OP_TYPE_CLUSTER,
+		.process_main = cluster_disable_recover,
+	},
 };
 
 struct sd_op_template *get_sd_op(uint8_t opcode)
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index c4225ea..998c846 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -118,6 +118,7 @@ struct cluster_info {
 
 	int use_directio;
 	uint8_t gateway_only;
+	uint8_t disable_recovery;
 
 	struct work_queue *gateway_wqueue;
 	struct work_queue *io_wqueue;
-- 
1.7.11.2




More information about the sheepdog mailing list