[sheepdog] [PATCH] corosync: fix cluster hang by cluster requests blocking confchg

Liu Yuan namei.unix at gmail.com
Thu Jul 5 11:24:47 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

This hang is caused by cluster request (add new vdi):

1) cluster request blocks the cluster and wait its worker to finish.
2) a confchg happens, but is queued after this cluster request.
3) cluster_request_fn() issues write request but always fail because of one
   node failure and retry for ever.
4) cluster_request_done() is never called, so we can't unblock the event list

this can be reprodced reliably by following script:
================

for i in `seq 0 7`; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p $((7000+$i));done
sleep 1
collie/collie cluster format  -c 3
echo create new vdis
(
for i in `seq 0 40`;do
collie/collie vdi create test$i 4M
done
) &

echo kill nodes
sleep 1
for i in 1 2 3 4 5; do pkill -f "sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p 700$i";sleep 1;done;

for i in `seq 1 5`; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p $((7000+$i));done

echo wait for object recovery to finish
for ((;;)); do
        if [ "$(pgrep collie)" ]; then
                sleep 1
        else
                break
        fi
done
=================

The fix tries to add leave confchg to the head of event list. join confchg is
untouched.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 include/list.h           |   27 +++++++++++++++++++++++++++
 sheep/cluster/corosync.c |   36 +++++++++++++++++++++++++++++++++---
 2 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/include/list.h b/include/list.h
index 30ee3c4..c84469d 100644
--- a/include/list.h
+++ b/include/list.h
@@ -54,6 +54,33 @@ static inline int list_empty(const struct list_head *head)
 	     &pos->member != (head); 					\
 	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
 
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member)			\
+	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
+	     &pos->member != (head); 	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
+	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
+		n = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     &pos->member != (head); 					\
+	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
 static inline void __list_add(struct list_head *new,
 			      struct list_head *prev,
 			      struct list_head *next)
diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
index 330cb71..27723bb 100644
--- a/sheep/cluster/corosync.c
+++ b/sheep/cluster/corosync.c
@@ -198,8 +198,8 @@ retry:
 	return 0;
 }
 
-static struct corosync_event *find_event(enum corosync_event_type type,
-		struct cpg_node *sender)
+static inline struct corosync_event *find_event(enum corosync_event_type type,
+						struct cpg_node *sender)
 {
 	struct corosync_event *cevent;
 
@@ -212,6 +212,35 @@ static struct corosync_event *find_event(enum corosync_event_type type,
 	return NULL;
 }
 
+static inline struct corosync_event *
+lookup_event_reverse(enum corosync_event_type type)
+{
+	struct corosync_event *cevent;
+
+	list_for_each_entry_reverse(cevent, &corosync_event_list, list) {
+		if (cevent->type == type)
+			return cevent;
+	}
+
+	return NULL;
+}
+
+/*
+ * Add confchg to the head of event list
+ *
+ * In order to process it ASAP and we keep relative order of confchg events.
+ */
+static inline void add_confchg_to_list_head(enum corosync_event_type type,
+					    struct corosync_event *cevent)
+{
+	struct corosync_event *entry = lookup_event_reverse(type);
+
+	if (entry)
+		list_add_tail(&cevent->list, &entry->list);
+	else
+		list_add(&cevent->list, &corosync_event_list);
+}
+
 static int is_master(struct cpg_node *node)
 {
 	int i;
@@ -561,7 +590,8 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
 		cevent->type = COROSYNC_EVENT_TYPE_LEAVE;
 		cevent->sender = left_sheep[i];
 
-		list_add_tail(&cevent->list, &corosync_event_list);
+		/* Leave event would possibly be blocked by cluster request */
+		add_confchg_to_list_head(COROSYNC_EVENT_TYPE_LEAVE, cevent);
 	}
 
 	/* dispatch join_handler */
-- 
1.7.10.2




More information about the sheepdog mailing list