[sheepdog] [PATCH 3/3] sheep: split notification messages into two queues

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Thu Aug 30 02:36:17 CEST 2012


Currently, the corosync driver uses two queues, confchg_list and
notify_list.  However, this causes problems because some messages
(e.g. format, shutdown, cluster snapshot, ...) needs to be ordered
with confchg events.

This patch splits multicast messages into two queues, block_list and
nonblock_list.  All block messages (e.g. vdi creation, etc) are linked
into block_list queue, and other notification messages and confchg
events are linked into nonblock_list.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/cluster.h          |    3 +-
 sheep/cluster/corosync.c |   54 ++++++++++++++++++++++-----------------------
 sheep/group.c            |   17 +++++++++-----
 sheep/sheep_priv.h       |    3 +-
 4 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/sheep/cluster.h b/sheep/cluster.h
index 75596a8..a93bdc3 100644
--- a/sheep/cluster.h
+++ b/sheep/cluster.h
@@ -86,7 +86,8 @@ struct cluster_driver {
 	 * Notify a message to all nodes in the cluster
 	 *
 	 * This function sends 'msg' to all the nodes.  The notified messages
-	 * can be read through sd_notify_handler().
+	 * can be read through sd_notify_handler() and totally ordered with
+	 * node change events.
 	 *
 	 * Returns zero on success, -1 on error
 	 */
diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
index 02130f3..a5533b5 100644
--- a/sheep/cluster/corosync.c
+++ b/sheep/cluster/corosync.c
@@ -31,8 +31,8 @@ static struct cpg_name cpg_group = { 8, "sheepdog" };
 static corosync_cfg_handle_t cfg_handle;
 static struct cpg_node this_node;
 
-static LIST_HEAD(corosync_notify_list);
-static LIST_HEAD(corosync_confchg_list);
+static LIST_HEAD(corosync_block_event_list);
+static LIST_HEAD(corosync_nonblock_event_list);
 
 static struct cpg_node cpg_nodes[SD_MAX_NODES];
 static size_t nr_cpg_nodes;
@@ -200,11 +200,11 @@ retry:
 }
 
 static inline struct corosync_event *
-find_notify_event(enum corosync_event_type type, struct cpg_node *sender)
+find_block_event(enum corosync_event_type type, struct cpg_node *sender)
 {
 	struct corosync_event *cevent;
 
-	list_for_each_entry(cevent, &corosync_notify_list, list) {
+	list_for_each_entry(cevent, &corosync_block_event_list, list) {
 		if (cevent->type == type &&
 		    cpg_node_equal(&cevent->sender, sender))
 			return cevent;
@@ -214,11 +214,11 @@ find_notify_event(enum corosync_event_type type, struct cpg_node *sender)
 }
 
 static inline struct corosync_event *
-find_confchg_event(enum corosync_event_type type, struct cpg_node *sender)
+find_nonblock_event(enum corosync_event_type type, struct cpg_node *sender)
 {
 	struct corosync_event *cevent;
 
-	list_for_each_entry(cevent, &corosync_confchg_list, list) {
+	list_for_each_entry(cevent, &corosync_nonblock_event_list, list) {
 		if (cevent->type == type &&
 		    cpg_node_equal(&cevent->sender, sender))
 			return cevent;
@@ -227,22 +227,13 @@ find_confchg_event(enum corosync_event_type type, struct cpg_node *sender)
 	return NULL;
 }
 
-static inline bool event_is_confchg(enum corosync_event_type type)
-{
-	if (type == COROSYNC_EVENT_TYPE_BLOCK ||
-	    type == COROSYNC_EVENT_TYPE_NOTIFY)
-		return false;
-
-	return true;
-}
-
 static inline struct corosync_event *
 find_event(enum corosync_event_type type, struct cpg_node *sender)
 {
-	if (event_is_confchg(type))
-		return find_confchg_event(type, sender);
+	if (type == COROSYNC_EVENT_TYPE_BLOCK)
+		return find_block_event(type, sender);
 	else
-		return find_notify_event(type, sender);
+		return find_nonblock_event(type, sender);
 }
 
 static int is_master(struct cpg_node *node)
@@ -365,13 +356,13 @@ static void __corosync_dispatch(void)
 {
 	struct corosync_event *cevent;
 
-	while (!list_empty(&corosync_notify_list) ||
-	       !list_empty(&corosync_confchg_list)) {
-		if (!list_empty(&corosync_confchg_list))
-			cevent = list_first_entry(&corosync_confchg_list,
+	while (!list_empty(&corosync_block_event_list) ||
+	       !list_empty(&corosync_nonblock_event_list)) {
+		if (!list_empty(&corosync_nonblock_event_list))
+			cevent = list_first_entry(&corosync_nonblock_event_list,
 						  typeof(*cevent), list);
 		else
-			cevent = list_first_entry(&corosync_notify_list,
+			cevent = list_first_entry(&corosync_block_event_list,
 						  typeof(*cevent), list);
 
 		/* update join status */
@@ -442,6 +433,14 @@ update_event(enum corosync_event_type type, struct cpg_node *sender, void *msg,
 	return cevent;
 }
 
+static void queue_event(struct corosync_event *cevent)
+{
+	if (cevent->type == COROSYNC_EVENT_TYPE_BLOCK)
+		list_add_tail(&cevent->list, &corosync_block_event_list);
+	else
+		list_add_tail(&cevent->list, &corosync_nonblock_event_list);
+}
+
 static void cdrv_cpg_deliver(cpg_handle_t handle,
 			     const struct cpg_name *group_name,
 			     uint32_t nodeid, uint32_t pid,
@@ -484,8 +483,7 @@ static void cdrv_cpg_deliver(cpg_handle_t handle,
 		} else
 			cevent->msg = NULL;
 
-
-		list_add_tail(&cevent->list, &corosync_notify_list);
+		queue_event(cevent);
 		break;
 	case COROSYNC_MSG_TYPE_LEAVE:
 		cevent = zalloc(sizeof(*cevent));
@@ -508,7 +506,7 @@ static void cdrv_cpg_deliver(cpg_handle_t handle,
 		} else
 			cevent->msg = NULL;
 
-		list_add_tail(&cevent->list, &corosync_confchg_list);
+		queue_event(cevent);
 		break;
 	case COROSYNC_MSG_TYPE_JOIN_RESPONSE:
 		cevent = update_event(COROSYNC_EVENT_TYPE_JOIN_REQUEST,
@@ -607,7 +605,7 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
 		cevent->type = COROSYNC_EVENT_TYPE_LEAVE;
 		cevent->sender = left_sheep[i];
 
-		list_add_tail(&cevent->list, &corosync_confchg_list);
+		queue_event(cevent);
 	}
 
 	/* dispatch join_handler */
@@ -618,7 +616,7 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
 
 		cevent->type = COROSYNC_EVENT_TYPE_JOIN_REQUEST;
 		cevent->sender = joined_sheep[i];
-		list_add_tail(&cevent->list, &corosync_confchg_list);
+		queue_event(cevent);
 	}
 
 	if (!join_finished) {
diff --git a/sheep/group.c b/sheep/group.c
index 3571b96..b05ff3e 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -312,7 +312,7 @@ bool sd_block_handler(struct sd_node *sender)
 
 	cluster_op_running = true;
 
-	req = list_first_entry(&sys->pending_list,
+	req = list_first_entry(&sys->pending_block_list,
 				struct request, pending_list);
 	req->work.fn = do_process_work;
 	req->work.done = cluster_op_done;
@@ -333,7 +333,7 @@ void queue_cluster_request(struct request *req)
 	eprintf("%s (%p)\n", op_name(req->op), req);
 
 	if (has_process_work(req->op)) {
-		list_add_tail(&req->pending_list, &sys->pending_list);
+		list_add_tail(&req->pending_list, &sys->pending_block_list);
 		sys->cdrv->block();
 	} else {
 		struct vdi_op_message *msg;
@@ -343,7 +343,7 @@ void queue_cluster_request(struct request *req)
 		if (!msg)
 			return;
 
-		list_add_tail(&req->pending_list, &sys->pending_list);
+		list_add_tail(&req->pending_list, &sys->pending_notify_list);
 
 		msg->rsp.result = SD_RES_SUCCESS;
 		sys->cdrv->notify(msg, size);
@@ -921,8 +921,12 @@ void sd_notify_handler(struct sd_node *sender, void *data, size_t data_len)
 		op_name(op), data_len, node_to_str(sender));
 
 	if (is_myself(sender->nid.addr, sender->nid.port)) {
-		req = list_first_entry(&sys->pending_list, struct request,
-				       pending_list);
+		if (has_process_work(op))
+			req = list_first_entry(&sys->pending_block_list,
+					       struct request, pending_list);
+		else
+			req = list_first_entry(&sys->pending_notify_list,
+					       struct request, pending_list);
 		list_del(&req->pending_list);
 	}
 
@@ -1237,7 +1241,8 @@ int create_cluster(int port, int64_t zone, int nr_vnodes,
 		sys->status = SD_STATUS_WAIT_FOR_FORMAT;
 	}
 
-	INIT_LIST_HEAD(&sys->pending_list);
+	INIT_LIST_HEAD(&sys->pending_block_list);
+	INIT_LIST_HEAD(&sys->pending_notify_list);
 	INIT_LIST_HEAD(&sys->failed_nodes);
 	INIT_LIST_HEAD(&sys->delayed_nodes);
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 90006f6..2def5b3 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -91,7 +91,8 @@ struct cluster_info {
 	 */
 	struct list_head delayed_nodes;
 
-	struct list_head pending_list;
+	struct list_head pending_block_list;
+	struct list_head pending_notify_list;
 
 	DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS);
 
-- 
1.7.2.5




More information about the sheepdog mailing list