[sheepdog] [PATCH] corosync: fix cluster hang by cluster requests blocking confchg
Liu Yuan
namei.unix at gmail.com
Thu Jul 5 10:09:44 CEST 2012
From: Liu Yuan <tailai.ly at taobao.com>
This hang is caused by cluster request (add new vdi):
1) cluster request blocks the cluster and wait its worker to finish.
2) a confchg happens, but is queued after this cluster request.
3) cluster_request_fn() issues write request but always fail because of one
node failure and retry for ever.
4) cluster_request_done() is never called, so we can't unblock the event list
this can be reprodced reliably by following script:
================
for i in `seq 0 7`; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p $((7000+$i));done
sleep 1
collie/collie cluster format -c 3
echo create new vdis
(
for i in `seq 0 40`;do
collie/collie vdi create test$i 4M
done
) &
echo kill nodes
sleep 1
for i in 1 2 3 4 5; do pkill -f "sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p 700$i";sleep 1;done;
for i in `seq 1 5`; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p $((7000+$i));done
echo wait for object recovery to finish
for ((;;)); do
if [ "$(pgrep collie)" ]; then
sleep 1
else
break
fi
done
=================
The fix tries to add confchg to the head of event list.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
include/list.h | 27 +++++++++++++++++++++++++++
sheep/cluster/corosync.c | 39 +++++++++++++++++++++++++++++++++++----
2 files changed, 62 insertions(+), 4 deletions(-)
diff --git a/include/list.h b/include/list.h
index 30ee3c4..c84469d 100644
--- a/include/list.h
+++ b/include/list.h
@@ -54,6 +54,33 @@ static inline int list_empty(const struct list_head *head)
&pos->member != (head); \
pos = n, n = list_entry(n->member.next, typeof(*n), member))
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member), \
+ n = list_entry(pos->member.prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
static inline void __list_add(struct list_head *new,
struct list_head *prev,
struct list_head *next)
diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
index 330cb71..55a19fe 100644
--- a/sheep/cluster/corosync.c
+++ b/sheep/cluster/corosync.c
@@ -198,8 +198,8 @@ retry:
return 0;
}
-static struct corosync_event *find_event(enum corosync_event_type type,
- struct cpg_node *sender)
+static inline struct corosync_event *find_event(enum corosync_event_type type,
+ struct cpg_node *sender)
{
struct corosync_event *cevent;
@@ -212,6 +212,36 @@ static struct corosync_event *find_event(enum corosync_event_type type,
return NULL;
}
+static inline struct corosync_event *
+lookup_event_reverse(enum corosync_event_type type)
+{
+ struct corosync_event *cevent;
+
+ list_for_each_entry_reverse(cevent, &corosync_event_list, list) {
+ if (cevent->type == type)
+ return cevent;
+ }
+
+ return NULL;
+}
+
+/*
+ * Add confchg to the event list
+ *
+ * We should add confchg event to head of the event list in order to process
+ * it ASAP and we keep relative order of confchg events.
+ */
+static inline void add_confchg_to_event_list(enum corosync_event_type type,
+ struct corosync_event *cevent)
+{
+ struct corosync_event *entry = lookup_event_reverse(type);
+
+ if (entry)
+ list_add_tail(&cevent->list, &entry->list);
+ else
+ list_add(&cevent->list, &corosync_event_list);
+}
+
static int is_master(struct cpg_node *node)
{
int i;
@@ -561,7 +591,7 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
cevent->type = COROSYNC_EVENT_TYPE_LEAVE;
cevent->sender = left_sheep[i];
- list_add_tail(&cevent->list, &corosync_event_list);
+ add_confchg_to_event_list(COROSYNC_EVENT_TYPE_LEAVE, cevent);
}
/* dispatch join_handler */
@@ -572,7 +602,8 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
cevent->type = COROSYNC_EVENT_TYPE_JOIN_REQUEST;
cevent->sender = joined_sheep[i];
- list_add_tail(&cevent->list, &corosync_event_list);
+ add_confchg_to_event_list(COROSYNC_EVENT_TYPE_JOIN_REQUEST,
+ cevent);
}
if (!join_finished) {
--
1.7.10.2
More information about the sheepdog
mailing list