[sheepdog] [PATCH v4] corosync: fix cluster hang by cluster requests blocking confchg
Liu Yuan
namei.unix at gmail.com
Thu Jul 5 18:46:52 CEST 2012
From: Liu Yuan <tailai.ly at taobao.com>
v4: use separate lists for confchg and notify event
--------------------------------------------------------- >8
This hang is caused by cluster request (add new vdi):
1) cluster request blocks the cluster and wait its worker to finish.
2) a confchg happens, but is queued after this cluster request.
3) cluster_request_fn() issues write request but always fail because of one
node failure and retry for ever.
4) cluster_request_done() is never called, so we can't unblock the event list
The fix is use separate list for notify and confchg event.
This can be reprodced reliably by following script:
================
for i in `seq 0 7`; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p $((7000+$i));done
sleep 1
collie/collie cluster format -c 3
echo create new vdis
(
for i in `seq 0 40`;do
collie/collie vdi create test$i 4M
done
) &
echo kill nodes
sleep 1
for i in 1 2 3 4 5; do pkill -f "sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p 700$i";sleep 1;done;
for i in `seq 1 5`; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p $((7000+$i));done
echo wait for object recovery to finish
for ((;;)); do
if [ "$(pgrep collie)" ]; then
sleep 1
else
break
fi
done
=================
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/cluster/corosync.c | 64 +++++++++++++++++++++++++++++++++++++---------
1 file changed, 52 insertions(+), 12 deletions(-)
diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
index 330cb71..aec1c5c 100644
--- a/sheep/cluster/corosync.c
+++ b/sheep/cluster/corosync.c
@@ -31,7 +31,8 @@ static struct cpg_name cpg_group = { 8, "sheepdog" };
static corosync_cfg_handle_t cfg_handle;
static struct cpg_node this_node;
-static LIST_HEAD(corosync_event_list);
+static LIST_HEAD(corosync_notify_list);
+static LIST_HEAD(corosync_confchg_list);
static struct cpg_node cpg_nodes[SD_MAX_NODES];
static size_t nr_cpg_nodes;
@@ -198,12 +199,12 @@ retry:
return 0;
}
-static struct corosync_event *find_event(enum corosync_event_type type,
- struct cpg_node *sender)
+static inline struct corosync_event *
+find_notify_event(enum corosync_event_type type, struct cpg_node *sender)
{
struct corosync_event *cevent;
- list_for_each_entry(cevent, &corosync_event_list, list) {
+ list_for_each_entry(cevent, &corosync_notify_list, list) {
if (cevent->type == type &&
cpg_node_equal(&cevent->sender, sender))
return cevent;
@@ -212,6 +213,38 @@ static struct corosync_event *find_event(enum corosync_event_type type,
return NULL;
}
+static inline struct corosync_event *
+find_confchg_event(enum corosync_event_type type, struct cpg_node *sender)
+{
+ struct corosync_event *cevent;
+
+ list_for_each_entry(cevent, &corosync_confchg_list, list) {
+ if (cevent->type == type &&
+ cpg_node_equal(&cevent->sender, sender))
+ return cevent;
+ }
+
+ return NULL;
+}
+
+static inline bool event_is_confchg(enum corosync_event_type type)
+{
+ if (type == COROSYNC_EVENT_TYPE_BLOCK ||
+ type == COROSYNC_MSG_TYPE_NOTIFY)
+ return false;
+
+ return true;
+}
+
+static inline struct corosync_event *
+find_event(enum corosync_event_type type, struct cpg_node *sender)
+{
+ if (event_is_confchg(type))
+ return find_confchg_event(type, sender);
+ else
+ return find_notify_event(type, sender);
+}
+
static int is_master(struct cpg_node *node)
{
int i;
@@ -326,8 +359,14 @@ static void __corosync_dispatch(void)
{
struct corosync_event *cevent;
- while (!list_empty(&corosync_event_list)) {
- cevent = list_first_entry(&corosync_event_list, typeof(*cevent), list);
+ while (!list_empty(&corosync_notify_list) ||
+ !list_empty(&corosync_confchg_list)) {
+ if (!list_empty(&corosync_confchg_list))
+ cevent = list_first_entry(&corosync_confchg_list,
+ typeof(*cevent), list);
+ else
+ cevent = list_first_entry(&corosync_notify_list,
+ typeof(*cevent), list);
/* update join status */
if (!join_finished) {
@@ -372,8 +411,9 @@ static void __corosync_dispatch(void)
}
}
-static struct corosync_event *update_event(enum corosync_event_type type,
- struct cpg_node *sender, void *msg, size_t msg_len)
+static struct corosync_event *
+update_event(enum corosync_event_type type, struct cpg_node *sender, void *msg,
+ size_t msg_len)
{
struct corosync_event *cevent;
@@ -439,7 +479,7 @@ static void cdrv_cpg_deliver(cpg_handle_t handle,
cevent->msg = NULL;
- list_add_tail(&cevent->list, &corosync_event_list);
+ list_add_tail(&cevent->list, &corosync_notify_list);
break;
case COROSYNC_MSG_TYPE_LEAVE:
cevent = zalloc(sizeof(*cevent));
@@ -462,7 +502,7 @@ static void cdrv_cpg_deliver(cpg_handle_t handle,
} else
cevent->msg = NULL;
- list_add_tail(&cevent->list, &corosync_event_list);
+ list_add_tail(&cevent->list, &corosync_confchg_list);
break;
case COROSYNC_MSG_TYPE_JOIN_RESPONSE:
cevent = update_event(COROSYNC_EVENT_TYPE_JOIN_REQUEST,
@@ -561,7 +601,7 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
cevent->type = COROSYNC_EVENT_TYPE_LEAVE;
cevent->sender = left_sheep[i];
- list_add_tail(&cevent->list, &corosync_event_list);
+ list_add_tail(&cevent->list, &corosync_confchg_list);
}
/* dispatch join_handler */
@@ -572,7 +612,7 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
cevent->type = COROSYNC_EVENT_TYPE_JOIN_REQUEST;
cevent->sender = joined_sheep[i];
- list_add_tail(&cevent->list, &corosync_event_list);
+ list_add_tail(&cevent->list, &corosync_confchg_list);
}
if (!join_finished) {
--
1.7.10.2
More information about the sheepdog
mailing list