[Sheepdog] [PATCH 1/2] sheep: introduce SD_STATUS_HALT
Liu Yuan
namei.unix at gmail.com
Tue Oct 11 11:06:16 CEST 2011
From: Liu Yuan <tailai.ly at taobao.com>
Currently, sheepdog will serve IO requests even if number of nodes is less than 'copies'.
When the number of the nodes (or zones) is less than the copies specified by
collie-cluster-format command, the sheepdog cluster should stop serving IO requests.
This is necessary to solve the below subtle case:
+ good nodes, - failed nodes.
0 1 2 3
+ - - +
+ --> - --> - --> +
+ + - # <-- permanently down.
^
|
this node has the latest data
at stage 3, we will have a cluster recovered without the data tracked at stage 1.
When the nodes are in the SD_STATUS_HALT, the sheepdog can also serve configuration change
and do the recovery job.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
include/sheep.h | 1 +
include/sheepdog_proto.h | 1 +
sheep/group.c | 27 ++++++++++++++++++++++-----
sheep/sheep_priv.h | 1 +
4 files changed, 25 insertions(+), 5 deletions(-)
diff --git a/include/sheep.h b/include/sheep.h
index 31516d9..943cdf7 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -254,6 +254,7 @@ static inline const char *sd_strerror(int err)
{SD_RES_WAIT_FOR_FORMAT, "Waiting for a format operation"},
{SD_RES_WAIT_FOR_JOIN, "Waiting for other nodes joining"},
{SD_RES_JOIN_FAILED, "The node had failed to join sheepdog"},
+ {SD_RES_HALT, "The node is stopped doing IO, short of living nodes"},
{SD_RES_OLD_NODE_VER, "Remote node has an old epoch"},
{SD_RES_NEW_NODE_VER, "Remote node has a new epoch"},
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 2b042f4..a5a41d0 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -58,6 +58,7 @@
#define SD_RES_WAIT_FOR_FORMAT 0x16 /* Sheepdog is waiting for a format operation */
#define SD_RES_WAIT_FOR_JOIN 0x17 /* Sheepdog is waiting for other nodes joining */
#define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */
+#define SD_RES_HALT 0x19 /* Target node is stopped doing IO */
/*
* Object ID rules
diff --git a/sheep/group.c b/sheep/group.c
index f6743f5..59293b2 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -335,6 +335,9 @@ void cluster_queue_request(struct work *work, int idx)
case SD_STATUS_JOIN_FAILED:
ret = SD_RES_JOIN_FAILED;
break;
+ case SD_STATUS_HALT:
+ ret = SD_RES_HALT;
+ break;
default:
ret = SD_RES_SYSTEM_ERROR;
break;
@@ -639,6 +642,10 @@ static int get_cluster_status(struct sheepdog_node_list_entry *from,
break;
case SD_STATUS_SHUTDOWN:
return SD_RES_SHUTDOWN;
+ case SD_STATUS_HALT:
+ if (inc_epoch);
+ *inc_epoch = 1;
+ break;
default:
break;
}
@@ -810,12 +817,13 @@ static void update_cluster_info(struct join_message *msg)
sheepid_to_str(&msg->nodes[i].sheepid));
}
- if (msg->cluster_status != SD_STATUS_OK)
+ if (msg->cluster_status == SD_STATUS_WAIT_FOR_JOIN)
add_node_to_leave_list((struct message_header *)msg);
sys->join_finished = 1;
- if (msg->cluster_status == SD_STATUS_OK && msg->inc_epoch)
+ if ((msg->cluster_status == SD_STATUS_OK || msg->cluster_status == SD_STATUS_HALT)
+ && msg->inc_epoch)
update_epoch_log(sys->epoch);
join_finished:
@@ -840,6 +848,12 @@ join_finished:
}
}
+ if (msg->cluster_status == SD_STATUS_HALT && msg->inc_epoch) {
+ sys->epoch++;
+ update_epoch_log(sys->epoch);
+ update_epoch_store(sys->epoch);
+ }
+
print_node_list(&sys->sd_node_list);
sys->status = msg->cluster_status;
@@ -1077,7 +1091,8 @@ static void send_join_response(struct work_notify *w)
m->state = DM_FIN;
dprintf("%d, %d\n", jm->result, jm->cluster_status);
- if (jm->result == SD_RES_SUCCESS && jm->cluster_status != SD_STATUS_OK) {
+ if (jm->result == SD_RES_SUCCESS &&
+ jm->cluster_status == SD_STATUS_WAIT_FOR_JOIN) {
jm->nr_leave_nodes = 0;
list_for_each_entry(node, &sys->leave_list, list) {
jm->leave_nodes[jm->nr_leave_nodes].sheepid = node->sheepid;
@@ -1181,7 +1196,8 @@ static void __sd_notify_done(struct cpg_event *cevent)
}
}
- if (do_recovery && sys->status == SD_STATUS_OK) {
+ if (do_recovery &&
+ (sys->status == SD_STATUS_OK || sys->status == SD_STATUS_HALT)) {
list_for_each_entry_safe(node, t, &sys->leave_list, list) {
list_del(&node->list);
}
@@ -1423,7 +1439,8 @@ static void __sd_leave_done(struct cpg_event *cevent)
print_node_list(&sys->sd_node_list);
- if (node_left && sys->status == SD_STATUS_OK)
+ if (node_left &&
+ (sys->status == SD_STATUS_OK || sys->status == SD_STATUS_HALT))
start_recovery(sys->epoch);
}
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index e2fcb40..355cd93 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -37,6 +37,7 @@
#define SD_STATUS_WAIT_FOR_JOIN 0x02
#define SD_STATUS_SHUTDOWN 0x03
#define SD_STATUS_JOIN_FAILED 0x04
+#define SD_STATUS_HALT 0x05
#define SD_RES_NETWORK_ERROR 0x81 /* Network error between sheeps */
--
1.7.6.1
More information about the sheepdog
mailing list