[sheepdog] [PATCH RFC 02/11] sheep: share joining nodes with newly added sheep

Yunkai Zhang yunkai.me at gmail.com
Wed Aug 8 23:14:14 CEST 2012


From: Yunkai Zhang <qiushu.zyk at taobao.com>

Sheep master will fill joining nodes in join_message when a sheep joins.

Newly added sheep get joining nodes from join_message and use it to initialize
and recalculate all_nodes array which will be used to generate current_vnode_info
in cluster_enable_recover().

Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com>
---
 include/internal_proto.h | 10 ++++++----
 sheep/group.c            | 39 +++++++++++++++++++++++++++++++--------
 sheep/ops.c              |  1 +
 3 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/include/internal_proto.h b/include/internal_proto.h
index 83d98f1..717fb79 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -194,6 +194,7 @@ struct join_message {
 	uint16_t nr_nodes;
 	uint16_t nr_failed_nodes;
 	uint16_t nr_delayed_nodes;
+	uint16_t nr_joining_nodes;
 	uint16_t cluster_flags;
 	uint32_t cluster_status;
 	uint32_t epoch;
@@ -204,10 +205,11 @@ struct join_message {
 
 	/*
 	 * A joining sheep puts the local node list here, which is nr_nodes
-	 * entries long.  After the master replies it will contain the list of
-	 * nodes that attempted to join but failed the join process.  The
-	 * number of entries in that case is nr_failed_nodes, which by
-	 * defintion must be smaller than nr_nodes.
+	 * entries long. After the master replies it will contain the list of
+	 * nodes by following order:
+	 * [ failed  nodes ]: size = nr_failed_nodes
+	 * [ delayed nodes ]: size = nr_delayed_nodes
+	 * [ joining nodes ]: size = nr_joining_nodes
 	 */
 	struct sd_node nodes[];
 };
diff --git a/sheep/group.c b/sheep/group.c
index cb244f7..ad3447d 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -767,16 +767,32 @@ static void get_vdi_bitmap(struct sd_node *nodes, size_t nr_nodes)
 	queue_work(sys->block_wqueue, &w->work);
 }
 
-static void prepare_recovery(struct sd_node *joined,
-				    struct sd_node *nodes, size_t nr_nodes)
+static void prepare_recovery(struct join_message *jm,
+				  struct sd_node *joined,
+				  struct sd_node *nodes, size_t nr_nodes)
 {
-	int i;
+	int i, j, n, found;
 
 	joining_nodes[nr_joining_nodes++] = *joined;
+
 	if (!nr_all_nodes) {
-		/* exclude the newly added one */
-		for (i = 0; i < nr_nodes; i++) {
-			if (!node_eq(nodes + i, joined))
+		/* initialize joining_nodes */
+		n = jm->nr_failed_nodes + jm->nr_delayed_nodes;
+		memcpy(&joining_nodes[nr_joining_nodes], &jm->nodes[n],
+		       jm->nr_joining_nodes * sizeof(*joining_nodes));
+		nr_joining_nodes += jm->nr_joining_nodes;
+
+		/* initialize all_nodes */
+		for (found = 0, i = 0; i < nr_nodes; i++) {
+			/* exclude all joining nodes */
+			for (j = 0; j < nr_joining_nodes; j++) {
+				if (node_eq(nodes + i, joining_nodes + j)) {
+					found = 1;
+					break;
+				}
+			}
+
+			if (!found)
 				all_nodes[nr_all_nodes++] = nodes[i];
 		}
 	}
@@ -866,7 +882,7 @@ static void update_cluster_info(struct join_message *msg,
 				start_recovery(current_vnode_info,
 					       old_vnode_info);
 			} else
-				prepare_recovery(joined, nodes, nr_nodes);
+				prepare_recovery(msg, joined, nodes, nr_nodes);
 		}
 
 		if (have_enough_zones())
@@ -1013,7 +1029,14 @@ enum cluster_join_result sd_check_join_cb(struct sd_node *joining,
 	    (ret == CJ_RES_SUCCESS || ret == CJ_RES_JOIN_LATER))
 		format_exceptional_node_list(jm);
 
-	n = jm->nr_failed_nodes + jm->nr_delayed_nodes;
+	if (sys->disable_recovery) {
+		n = jm->nr_failed_nodes + jm->nr_delayed_nodes;
+		memcpy(&jm->nodes[n], joining_nodes,
+		       nr_joining_nodes * sizeof(*joining_nodes));
+		jm->nr_joining_nodes = nr_joining_nodes;
+	}
+
+	n = jm->nr_failed_nodes + jm->nr_delayed_nodes + jm->nr_joining_nodes;
 	*opaque_len = sizeof(*jm) + n * sizeof(jm->nodes[0]);
 	return ret;
 }
diff --git a/sheep/ops.c b/sheep/ops.c
index 0cddf66..b2e9c69 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -292,6 +292,7 @@ static int cluster_enable_recover(const struct sd_req *req,
 		put_vnode_info(old_vnode_info);
 	}
 
+	nr_all_nodes = 0;
 	nr_joining_nodes = 0;
 	sys->disable_recovery = 0;
 	return SD_RES_SUCCESS;
-- 
1.7.11.2




More information about the sheepdog mailing list