[sheepdog] [PATCH v4] corosync: fix cluster hang by cluster requests blocking confchg

Liu Yuan namei.unix at gmail.com
Thu Jul 5 18:46:52 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

v4: use separate lists for confchg and notify event
--------------------------------------------------------- >8

This hang is caused by cluster request (add new vdi):

1) cluster request blocks the cluster and wait its worker to finish.
2) a confchg happens, but is queued after this cluster request.
3) cluster_request_fn() issues write request but always fail because of one
   node failure and retry for ever.
4) cluster_request_done() is never called, so we can't unblock the event list

The fix is use separate list for notify and confchg event.

This can be reprodced reliably by following script:
================

for i in `seq 0 7`; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p $((7000+$i));done
sleep 1
collie/collie cluster format  -c 3
echo create new vdis
(
for i in `seq 0 40`;do
collie/collie vdi create test$i 4M
done
) &

echo kill nodes
sleep 1
for i in 1 2 3 4 5; do pkill -f "sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p 700$i";sleep 1;done;

for i in `seq 1 5`; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p $((7000+$i));done

echo wait for object recovery to finish
for ((;;)); do
        if [ "$(pgrep collie)" ]; then
                sleep 1
        else
                break
        fi
done
=================

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/cluster/corosync.c |   64 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 52 insertions(+), 12 deletions(-)

diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
index 330cb71..aec1c5c 100644
--- a/sheep/cluster/corosync.c
+++ b/sheep/cluster/corosync.c
@@ -31,7 +31,8 @@ static struct cpg_name cpg_group = { 8, "sheepdog" };
 static corosync_cfg_handle_t cfg_handle;
 static struct cpg_node this_node;
 
-static LIST_HEAD(corosync_event_list);
+static LIST_HEAD(corosync_notify_list);
+static LIST_HEAD(corosync_confchg_list);
 
 static struct cpg_node cpg_nodes[SD_MAX_NODES];
 static size_t nr_cpg_nodes;
@@ -198,12 +199,12 @@ retry:
 	return 0;
 }
 
-static struct corosync_event *find_event(enum corosync_event_type type,
-		struct cpg_node *sender)
+static inline struct corosync_event *
+find_notify_event(enum corosync_event_type type, struct cpg_node *sender)
 {
 	struct corosync_event *cevent;
 
-	list_for_each_entry(cevent, &corosync_event_list, list) {
+	list_for_each_entry(cevent, &corosync_notify_list, list) {
 		if (cevent->type == type &&
 		    cpg_node_equal(&cevent->sender, sender))
 			return cevent;
@@ -212,6 +213,38 @@ static struct corosync_event *find_event(enum corosync_event_type type,
 	return NULL;
 }
 
+static inline struct corosync_event *
+find_confchg_event(enum corosync_event_type type, struct cpg_node *sender)
+{
+	struct corosync_event *cevent;
+
+	list_for_each_entry(cevent, &corosync_confchg_list, list) {
+		if (cevent->type == type &&
+		    cpg_node_equal(&cevent->sender, sender))
+			return cevent;
+	}
+
+	return NULL;
+}
+
+static inline bool event_is_confchg(enum corosync_event_type type)
+{
+	if (type == COROSYNC_EVENT_TYPE_BLOCK ||
+	    type == COROSYNC_MSG_TYPE_NOTIFY)
+		return false;
+
+	return true;
+}
+
+static inline struct corosync_event *
+find_event(enum corosync_event_type type, struct cpg_node *sender)
+{
+	if (event_is_confchg(type))
+		return find_confchg_event(type, sender);
+	else
+		return find_notify_event(type, sender);
+}
+
 static int is_master(struct cpg_node *node)
 {
 	int i;
@@ -326,8 +359,14 @@ static void __corosync_dispatch(void)
 {
 	struct corosync_event *cevent;
 
-	while (!list_empty(&corosync_event_list)) {
-		cevent = list_first_entry(&corosync_event_list, typeof(*cevent), list);
+	while (!list_empty(&corosync_notify_list) ||
+	       !list_empty(&corosync_confchg_list)) {
+		if (!list_empty(&corosync_confchg_list))
+			cevent = list_first_entry(&corosync_confchg_list,
+						  typeof(*cevent), list);
+		else
+			cevent = list_first_entry(&corosync_notify_list,
+						  typeof(*cevent), list);
 
 		/* update join status */
 		if (!join_finished) {
@@ -372,8 +411,9 @@ static void __corosync_dispatch(void)
 	}
 }
 
-static struct corosync_event *update_event(enum corosync_event_type type,
-		struct cpg_node *sender, void *msg, size_t msg_len)
+static struct corosync_event *
+update_event(enum corosync_event_type type, struct cpg_node *sender, void *msg,
+	     size_t msg_len)
 {
 	struct corosync_event *cevent;
 
@@ -439,7 +479,7 @@ static void cdrv_cpg_deliver(cpg_handle_t handle,
 			cevent->msg = NULL;
 
 
-		list_add_tail(&cevent->list, &corosync_event_list);
+		list_add_tail(&cevent->list, &corosync_notify_list);
 		break;
 	case COROSYNC_MSG_TYPE_LEAVE:
 		cevent = zalloc(sizeof(*cevent));
@@ -462,7 +502,7 @@ static void cdrv_cpg_deliver(cpg_handle_t handle,
 		} else
 			cevent->msg = NULL;
 
-		list_add_tail(&cevent->list, &corosync_event_list);
+		list_add_tail(&cevent->list, &corosync_confchg_list);
 		break;
 	case COROSYNC_MSG_TYPE_JOIN_RESPONSE:
 		cevent = update_event(COROSYNC_EVENT_TYPE_JOIN_REQUEST,
@@ -561,7 +601,7 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
 		cevent->type = COROSYNC_EVENT_TYPE_LEAVE;
 		cevent->sender = left_sheep[i];
 
-		list_add_tail(&cevent->list, &corosync_event_list);
+		list_add_tail(&cevent->list, &corosync_confchg_list);
 	}
 
 	/* dispatch join_handler */
@@ -572,7 +612,7 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
 
 		cevent->type = COROSYNC_EVENT_TYPE_JOIN_REQUEST;
 		cevent->sender = joined_sheep[i];
-		list_add_tail(&cevent->list, &corosync_event_list);
+		list_add_tail(&cevent->list, &corosync_confchg_list);
 	}
 
 	if (!join_finished) {
-- 
1.7.10.2




More information about the sheepdog mailing list