[Sheepdog] [PATCH 3/3] sheep: add SD_OP_RECOVER operation
Liu Yuan
namei.unix at gmail.com
Sat Oct 22 07:32:44 CEST 2011
From: Liu Yuan <tailai.ly at taobao.com>
With this patch, manual recovery command starts working.
[Test]
script1:
for i in 0 1 2; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
collie/collie cluster format -H
collie/collie cluster shutdown;
sleep 1
# node 1 2 permanently down
for i in 0; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
for i in 0; do ./collie/collie cluster info -p 700$i; done
collie/collie cluster recover
for i in 0; do ./collie/collie cluster info -p 700$i; done
for i in 3 4; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
for i in 0 3 4; do ./collie/collie cluster info -p 700$i; done
output:
root at taobao:/home/dev/sheepdog# ./test2.sh
Cluster status: Waiting for other nodes joining
Creation time Epoch Nodes
Cluster status: running
Creation time Epoch Nodes
2011-10-22 02:18:49 2 [192.168.0.1:7000]
2011-10-22 02:18:49 1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002]
Cluster status: running
Creation time Epoch Nodes
2011-10-22 02:18:49 4 [192.168.0.1:7000, 192.168.0.1:7003, 192.168.0.1:7004]
2011-10-22 02:18:49 3 [192.168.0.1:7000, 192.168.0.1:7003]
2011-10-22 02:18:49 2 [192.168.0.1:7000]
2011-10-22 02:18:49 1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002]
....
script2:
for i in 0 1 2; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
collie/collie cluster format
for i in 0 1 2; do pkill -f "sheep -d /store/$i"; sleep 1; done
# mater node 2 permanently down
for i in 0 1; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
for i in 1; do ./collie/collie cluster info -p 700$i; done
collie/collie cluster recover -p 7001
for i in 1; do ./collie/collie cluster info -p 700$i; done
for i in 0 3; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done
for i in 0 1 3; do ./collie/collie cluster info -p 700$i; done
output:
Cluster status: Waiting for other nodes joining
Creation time Epoch Nodes
Cluster status: The sheepdog is stopped doing IO, short of living nodes
Creation time Epoch Nodes
Cluster status: running
Creation time Epoch Nodes
2011-10-22 01:59:36 5 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7003]
2011-10-22 01:59:36 4 [192.168.0.1:7000, 192.168.0.1:7001]
2011-10-22 01:59:36 3 [192.168.0.1:7001]
2011-10-22 01:59:36 2 [192.168.0.1:7001, 192.168.0.1:7002]
2011-10-22 01:59:36 1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002]
...
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
include/sheep.h | 1 +
include/sheepdog_proto.h | 1 +
sheep/ops.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 57 insertions(+), 0 deletions(-)
diff --git a/include/sheep.h b/include/sheep.h
index 46ecf96..072ea7a 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -261,6 +261,7 @@ static inline const char *sd_strerror(int err)
{SD_RES_WAIT_FOR_JOIN, "Waiting for other nodes joining"},
{SD_RES_JOIN_FAILED, "The node had failed to join sheepdog"},
{SD_RES_HALT, "The sheepdog is stopped doing IO, short of living nodes"},
+ {SD_RES_MANUAL_RECOVER, "We should not maunally recover the running/halted cluster"},
{SD_RES_OLD_NODE_VER, "Remote node has an old epoch"},
{SD_RES_NEW_NODE_VER, "Remote node has a new epoch"},
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 976a5f4..65aeef3 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -59,6 +59,7 @@
#define SD_RES_WAIT_FOR_JOIN 0x17 /* Sheepdog is waiting for other nodes joining */
#define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */
#define SD_RES_HALT 0x19 /* Sheepdog is stopped doing IO */
+#define SD_RES_MANUAL_RECOVER 0x1A /* Users should not manually recover this cluster */
/*
* Object ID rules
diff --git a/sheep/ops.c b/sheep/ops.c
index 0d38e7b..bdabe2c 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -301,6 +301,55 @@ static int process_get_epoch(const struct sd_req *req, struct sd_rsp *rsp,
return ret;
}
+static int cluster_manual_recover(const struct sd_req *req, struct sd_rsp *rsp,
+ void *data)
+{
+ int s, nr_zones, ret = SD_RES_SUCCESS;
+ uint8_t c;
+ uint16_t f;
+
+ /* We should manually recover the cluster when
+ * 1) the master is physically down (different epoch condition).
+ * 2) some nodes are physically down (same epoch condition).
+ * In both case, the nodes(s) stat is WAIT_FOR_JOIN.
+ */
+ if (!sys_stat_wait_join()) {
+ ret = SD_RES_MANUAL_RECOVER;
+ goto out;
+ }
+
+ ret = get_cluster_copies(&c);
+ if (ret)
+ goto out;
+ ret = get_cluster_flags(&f);
+ if (ret)
+ goto out;
+
+ sys->nr_sobjs = c;
+ sys->flags = f;
+
+ s = SD_STATUS_OK;
+ if (!sys_flag_nohalt()) {
+ nr_zones = get_zones_nr_from(sys->nodes, sys->nr_nodes);
+ if (nr_zones < sys->nr_sobjs)
+ s = SD_STATUS_HALT;
+ }
+
+ dprintf("flags %d, nr_zones %d, copies %d\n", sys->flags, ,nr_zones, sys->nr_sobjs);
+
+ sys->epoch++; /* some nodes are left, so we get a new epoch */
+ ret = update_epoch_log(sys->epoch);
+ if (ret) {
+ ret = SD_RES_EIO;
+ sys->epoch--;
+ goto out;
+ }
+ update_epoch_store(sys->epoch);
+ sys_stat_set(s);
+out:
+ return ret;
+}
+
static struct sd_op_template sd_ops[] = {
/* cluster operations */
@@ -345,6 +394,12 @@ static struct sd_op_template sd_ops[] = {
.type = SD_OP_TYPE_CLUSTER,
},
+ [SD_OP_RECOVER] = {
+ .type = SD_OP_TYPE_CLUSTER,
+ .available_always = 1,
+ .post_process = cluster_manual_recover,
+ },
+
/* store operations */
[SD_OP_READ_VDIS] = {
.type = SD_OP_TYPE_STORE,
--
1.7.6.1
More information about the sheepdog
mailing list