[sheepdog] [PATCH 2/2] sheep: factor node list updates

Thu May 17 10:15:16 CEST 2012

Add a new helper that updates sys->nodes and sys->nr_nodes as well as the
vnode list, and use it in all places that update cluster membership.

In __sd_leave_done it can be used as is and gives a nice cleanup, in the
master transfer case sd_join_handler it can also be used as is, but I've
added an assert for the previously implicit assumption that no other nodes
can exist.  The tricky case is update_cluster_info/finish_join, where
we first need to write an entry into the epoch log for the epoch before
the joining code so that the recovery code can do the right thing.

Signed-off-by: Christoph Hellwig <hch at lst.de>
---
 sheep/group.c |   73 ++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 41 insertions(+), 32 deletions(-)

Index: sheepdog/sheep/group.c
===================================================================

--- sheepdog.orig/sheep/group.c	2012-05-17 09:59:20.719984516 +0200
+++ sheepdog/sheep/group.c	2012-05-17 09:59:58.763984155 +0200
@@ -585,22 +585,50 @@ out:
 	return ret;
 }
 
+static void update_node_info(struct sd_node *nodes, size_t nr_nodes)
+{
+	print_node_list(nodes, nr_nodes);
+
+	sys->nr_nodes = nr_nodes;
+	memcpy(sys->nodes, nodes, sizeof(*sys->nodes) * sys->nr_nodes);
+	qsort(sys->nodes, sys->nr_nodes, sizeof(*sys->nodes), node_cmp);
+
+	update_vnode_info();
+}
+
+static void log_last_epoch(struct join_message *msg, struct sd_node *joined,
+		struct sd_node *nodes, size_t nr_nodes)
+{
+	if ((msg->cluster_status == SD_STATUS_OK ||
+	     msg->cluster_status == SD_STATUS_HALT) && msg->inc_epoch) {
+		struct sd_node old_nodes[SD_MAX_NODES];
+		size_t count = 0, i;
+
+		/* exclude the newly added one */
+		for (i = 0; i < nr_nodes; i++) {
+			if (node_eq(nodes + i, joined))
+				old_nodes[count++] = nodes[i];
+		}
+		qsort(old_nodes, count, sizeof(struct sd_node), node_cmp);
+
+		update_epoch_log(sys->epoch, old_nodes, count);
+	}
+}
+
 static void finish_join(struct join_message *msg, struct sd_node *joined,
 		struct sd_node *nodes, size_t nr_nodes)
 {
 	int i;
 
+	sys->join_finished = 1;
 	sys->nr_copies = msg->nr_copies;
 	sys->epoch = msg->epoch;
 
-	/* add nodes execept for newly joined one */
-	for (i = 0; i < nr_nodes; i++) {
-		if (node_eq(nodes + i, joined))
-			continue;
-
-		sys->nodes[sys->nr_nodes++] = nodes[i];
-	}
-	qsort(sys->nodes, sys->nr_nodes, sizeof(*sys->nodes), node_cmp);
+	/*
+	 * Make sure we have an epoch log record for the epoch before
+	 * this node joins, as recovery expects this record to exist.
+	 */
+	log_last_epoch(msg, joined, nodes, nr_nodes);
 
 	if (msg->cluster_status != SD_STATUS_OK) {
 		int nr_leave_nodes;
@@ -624,12 +652,6 @@ static void finish_join(struct join_mess
 		}
 	}
 
-	sys->join_finished = 1;
-
-	if ((msg->cluster_status == SD_STATUS_OK ||
-	     msg->cluster_status == SD_STATUS_HALT) && msg->inc_epoch)
-		update_epoch_log(sys->epoch, sys->nodes, sys->nr_nodes);
-
 	if (!sd_store && strlen((char *)msg->store)) {
 		sd_store = find_store_driver((char *)msg->store);
 		if (sd_store) {
@@ -653,8 +675,7 @@ static void update_cluster_info(struct j
 	if (!sys->join_finished)
 		finish_join(msg, joined, nodes, nr_nodes);
 
-	sys->nodes[sys->nr_nodes++] = *joined;
-	qsort(sys->nodes, sys->nr_nodes, sizeof(*sys->nodes), node_cmp);
+	update_node_info(nodes, nr_nodes);
 
 	if (msg->cluster_status == SD_STATUS_OK ||
 	    msg->cluster_status == SD_STATUS_HALT) {
@@ -670,10 +691,7 @@ static void update_cluster_info(struct j
 			set_cluster_ctime(msg->ctime);
 		}
 	}
-	update_vnode_info();
 	sys_stat_set(msg->cluster_status);
-
-	print_node_list(sys->nodes, sys->nr_nodes);
 }
 
 static void __sd_notify(struct event_struct *cevent)
@@ -932,21 +950,15 @@ static void __sd_leave_done(struct event
 {
 	struct work_leave *w = container_of(cevent, struct work_leave, cev);
 
-	sys->nr_nodes = w->member_list_entries;
-	memcpy(sys->nodes, w->member_list, sizeof(*sys->nodes) * sys->nr_nodes);
-	qsort(sys->nodes, sys->nr_nodes, sizeof(*sys->nodes), node_cmp);
+	update_node_info(w->member_list, w->member_list_entries);
 
 	if (sys_can_recover()) {
 		sys->epoch++;
 		update_epoch_store(sys->epoch);
 		update_epoch_log(sys->epoch, sys->nodes, sys->nr_nodes);
-	}
-	update_vnode_info();
 
-	print_node_list(sys->nodes, sys->nr_nodes);
-
-	if (sys_can_recover())
 		start_recovery(sys->epoch);
+	}
 
 	if (sys_can_halt()) {
 		if (current_vnode_info->nr_zones < sys->nr_copies)
@@ -1277,11 +1289,8 @@ void sd_join_handler(struct sd_node *joi
 		 */
 		if (!sys->join_finished) {
 			sys->join_finished = 1;
-			sys->nodes[sys->nr_nodes++] = sys->this_node;
-			qsort(sys->nodes, sys->nr_nodes, sizeof(*sys->nodes), node_cmp);
-			sys->epoch = get_latest_epoch();
-
-			update_vnode_info();
+			assert(sys->nr_nodes == 0);
+			update_node_info(&sys->this_node, 1);
 		}
 
 		nr_local = get_nodes_nr_epoch(sys->epoch);