[Sheepdog] [PATCH v3 1/7] sheep: add transient failure detection

Wed May 2 09:12:49 CEST 2012

From: HaiTing Yao <wujue.yht at taobao.com>

Sometimes we need node can be back in a while.

When we need this:

1, restart sheepdog daemon for ugrade or other purpose

2, the corosync driver lose its token for a short while

How to implement this:

Assume the node leave as transient failue and mark the node's status as
failed. Wait to the node come back for a short while. If the time is up
and the node has not been back, change the transient failure to
permanent one.

Here are some changes in header file.

Signed-off-by: HaiTing Yao <wujue.yht at taobao.com>
---
 include/sheep.h    |   14 ++++++++++++++
 sheep/cluster.h    |    8 ++++++++
 sheep/sheep_priv.h |   10 ++++++++++
 3 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/include/sheep.h b/include/sheep.h
index 7e287c4..b5a5d5d 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -66,6 +66,10 @@
 #define SD_RES_NOT_FORMATTED 0x43 /* Sheepdog is not formatted yet */
 #define SD_RES_INVALID_CTIME 0x44 /* Creation time of sheepdog is different */
 #define SD_RES_INVALID_EPOCH 0x45 /* Invalid epoch */
+/* node come back again after temprorary failure */
+#define SD_RES_NODE_COME_BACK 0x46
+/* cluster has temporarily failed node */
+#define SD_RES_CLUSTER_TEMP_FAILURE 0x47
 
 #define SD_FLAG_NOHALT       0x0004 /* Serve the IO rquest even lack of nodes */
 
@@ -145,11 +149,17 @@ struct sd_node_rsp {
 	uint64_t	store_free;
 };
 
+enum node_vnode_status {
+	NODE_STATUS_NORMAL,
+	NODE_STATUS_FAIL,
+};
+
 struct sd_node {
 	uint8_t         addr[16];
 	uint16_t        port;
 	uint16_t	nr_vnodes;
 	uint32_t	zone;
+	uint32_t	status;
 };
 
 struct sd_vnode {
@@ -158,6 +168,7 @@ struct sd_vnode {
 	uint16_t        port;
 	uint16_t	node_idx;
 	uint32_t	zone;
+	uint32_t	status;
 };
 
 struct epoch_log {
@@ -295,6 +306,7 @@ static inline const char *sd_strerror(int err)
 		{SD_RES_NOT_FORMATTED, "Cluster has not been formatted"},
 		{SD_RES_INVALID_CTIME, "Creation times differ"},
 		{SD_RES_INVALID_EPOCH, "Invalid epoch"},
+		{SD_RES_CLUSTER_TEMP_FAILURE, "Nodes occur temporary failure"},
 	};
 
 	for (i = 0; i < ARRAY_SIZE(errors); ++i)
@@ -368,4 +380,6 @@ static inline int nodes_to_vnodes(struct sd_node *nodes, int nr,
 	return nr_vnodes;
 }
 
+int temp_failure_enabled(void);
+
 #endif
diff --git a/sheep/cluster.h b/sheep/cluster.h
index d543e99..12d141f 100644
--- a/sheep/cluster.h
+++ b/sheep/cluster.h
@@ -30,6 +30,7 @@ enum cluster_join_result {
 	CJ_RES_MASTER_TRANSFER, /* Transfer mastership.  The joining
 				 * node has a newer epoch, so this node
 				 * will leave the cluster (restart later). */
+	CJ_RES_COME_BACK, /* node come back after reboot */
 };
 
 struct cluster_driver {
@@ -99,6 +100,13 @@ struct cluster_driver {
 	 */
 	int (*dispatch)(void);
 
+	/*
+	 * remove one useless node
+	 *
+	 * Returns zero on success, -1 on error
+	 */
+	int (*remove_node)(void *node);
+
 	struct list_head list;
 };
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index fd36ea4..83d2a9b 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -49,6 +49,14 @@ struct event_struct {
 	struct list_head event_list;
 };
 
+#define SD_MAX_NODES_TEMP_FAIL (SD_MAX_NODES - 1)
+
+struct temp_failure {
+	int busy;
+	struct sd_node node;
+	struct timer leave_timer;
+};
+
 struct client_info {
 	struct connection conn;
 
@@ -150,6 +158,8 @@ struct cluster_info {
 	int use_directio;
 	uint8_t async_flush;
 
+	uint32_t templeft_time;
+
 	struct work_queue *event_wqueue;
 	struct work_queue *gateway_wqueue;
 	struct work_queue *io_wqueue;
-- 
1.7.1