[sheepdog] [Sheepdog] [PATCH S003 v2] sheep: handle master crashing before sending JOIN request

Christoph Hellwig hch at infradead.org
Wed May 16 15:20:41 CEST 2012


On Fri, May 11, 2012 at 10:32:07AM +0800, Liu Yuan wrote:
>   I have run script/checkpatch.pl against your patch, and get 3
> warnings, please fix it.

I really don't like patches getting lost because of trivial issues,
here's a version with the checkpatch warnings fixed and trivial
commit message updates.  Please make sure to properly attribute it
to Shevek.

-------------- next part --------------
From: Shevek <shevek at anarres.org>
Subject: sheep: handle master crashing before sending join request

This patch fixes one of the cases described by Huxinwei and Liu Yuan where
sheepdog fails to elect a master. A longer description is in the patch.
    
A problem arises if a node joins the cluster and generates a confchg event,
then crashes or leaves without sending a join request and receiving a join
response. The second node to join never becomes master, and the entire cluster
hangs.
    
This patch allows a node to detect whether it should promote itself to master
after an arbitrary confchg event. Every node except the master creates a
blocked JOIN event for every node that joined after itself, therefore the
master is the node which has a JOIN event for every node in the members list.
    
Signed-off-by: Shevek <shevek at anarres.org>

---
 sheep/cluster/corosync.c |   57 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 15 deletions(-)

Index: sheepdog/sheep/cluster/corosync.c
===================================================================
--- sheepdog.orig/sheep/cluster/corosync.c	2012-05-16 15:06:57.359671131 +0200
+++ sheepdog/sheep/cluster/corosync.c	2012-05-16 15:09:37.583669605 +0200
@@ -36,6 +36,8 @@ static LIST_HEAD(corosync_block_list);
 
 static struct cpg_node cpg_nodes[SD_MAX_NODES];
 static size_t nr_cpg_nodes;
+static int self_elect;
+static int join_finished;
 
 /* event types which are dispatched in corosync_dispatch() */
 enum corosync_event_type {
@@ -67,7 +69,6 @@ struct corosync_event {
 
 	int blocked;
 	int callbacked;
-	int first_node;
 
 	struct list_head list;
 };
@@ -373,7 +374,6 @@ static int __corosync_dispatch_one(struc
 static void __corosync_dispatch(void)
 {
 	struct corosync_event *cevent;
-	static int join_finished;
 	int done;
 
 	while (!list_empty(&corosync_event_list)) {
@@ -381,7 +381,7 @@ static void __corosync_dispatch(void)
 
 		/* update join status */
 		if (!join_finished && cevent->type == COROSYNC_EVENT_TYPE_JOIN) {
-			if (cevent->first_node) {
+			if (cevent->blocked && self_elect) {
 				join_finished = 1;
 				nr_cpg_nodes = 0;
 			}
@@ -528,6 +528,17 @@ static void cdrv_cpg_deliver(cpg_handle_
 	__corosync_dispatch();
 }
 
+static void build_cpg_node_list(struct cpg_node *nodes,
+		const struct cpg_address *list, size_t nr)
+{
+	int i;
+
+	for (i = 0; i < nr; i++) {
+		nodes[i].nodeid = list[i].nodeid;
+		nodes[i].pid = list[i].pid;
+	}
+}
+
 static void cdrv_cpg_confchg(cpg_handle_t handle,
 			     const struct cpg_name *group_name,
 			     const struct cpg_address *member_list,
@@ -539,22 +550,19 @@ static void cdrv_cpg_confchg(cpg_handle_
 {
 	struct corosync_event *cevent;
 	int i;
+	struct cpg_node member_sheep[SD_MAX_NODES];
 	struct cpg_node joined_sheep[SD_MAX_NODES];
 	struct cpg_node left_sheep[SD_MAX_NODES];
+	int promote = 1;
 
 	dprintf("mem:%zu, joined:%zu, left:%zu\n",
 		member_list_entries, joined_list_entries,
 		left_list_entries);
 
 	/* convert cpg_address to cpg_node */
-	for (i = 0; i < left_list_entries; i++) {
-		left_sheep[i].nodeid = left_list[i].nodeid;
-		left_sheep[i].pid = left_list[i].pid;
-	}
-	for (i = 0; i < joined_list_entries; i++) {
-		joined_sheep[i].nodeid = joined_list[i].nodeid;
-		joined_sheep[i].pid = joined_list[i].pid;
-	}
+	build_cpg_node_list(member_sheep, member_list, member_list_entries);
+	build_cpg_node_list(left_sheep, left_list, left_list_entries);
+	build_cpg_node_list(joined_sheep, joined_list, joined_list_entries);
 
 	/* dispatch leave_handler */
 	for (i = 0; i < left_list_entries; i++) {
@@ -604,13 +612,32 @@ static void cdrv_cpg_confchg(cpg_handle_
 		cevent->type = COROSYNC_EVENT_TYPE_JOIN;
 		cevent->sender = joined_sheep[i];
 		cevent->blocked = 1; /* FIXME: add explanation */
-		if (member_list_entries == joined_list_entries - left_list_entries &&
-		    cpg_node_equal(&joined_sheep[0], &this_node))
-			cevent->first_node = 1;
-
 		list_add_tail(&cevent->list, &corosync_event_list);
 	}
 
+	if (!join_finished) {
+		/*
+		 * Exactly one non-master member has seen join events for
+		 * all other members, because events are ordered.
+		 */
+		for (i = 0; i < member_list_entries; i++) {
+			cevent = find_block_event(COROSYNC_EVENT_TYPE_JOIN,
+					&member_sheep[i]);
+			if (!cevent) {
+				dprintf("Not promoting because member is "
+					"not in our event list.\n");
+				promote = 0;
+				break;
+			}
+		}
+
+		/*
+		 * If we see the join events for all nodes promote ourself to
+		 * master right here.
+		 */
+		if (promote)
+			self_elect = 1;
+	}
 	__corosync_dispatch();
 }
 


More information about the sheepdog mailing list