[sheepdog] [PATCH 3/3] sheep: split notification messages into two queues
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Thu Aug 30 02:36:17 CEST 2012
Currently, the corosync driver uses two queues, confchg_list and
notify_list. However, this causes problems because some messages
(e.g. format, shutdown, cluster snapshot, ...) needs to be ordered
with confchg events.
This patch splits multicast messages into two queues, block_list and
nonblock_list. All block messages (e.g. vdi creation, etc) are linked
into block_list queue, and other notification messages and confchg
events are linked into nonblock_list.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
sheep/cluster.h | 3 +-
sheep/cluster/corosync.c | 54 ++++++++++++++++++++++-----------------------
sheep/group.c | 17 +++++++++-----
sheep/sheep_priv.h | 3 +-
4 files changed, 41 insertions(+), 36 deletions(-)
diff --git a/sheep/cluster.h b/sheep/cluster.h
index 75596a8..a93bdc3 100644
--- a/sheep/cluster.h
+++ b/sheep/cluster.h
@@ -86,7 +86,8 @@ struct cluster_driver {
* Notify a message to all nodes in the cluster
*
* This function sends 'msg' to all the nodes. The notified messages
- * can be read through sd_notify_handler().
+ * can be read through sd_notify_handler() and totally ordered with
+ * node change events.
*
* Returns zero on success, -1 on error
*/
diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
index 02130f3..a5533b5 100644
--- a/sheep/cluster/corosync.c
+++ b/sheep/cluster/corosync.c
@@ -31,8 +31,8 @@ static struct cpg_name cpg_group = { 8, "sheepdog" };
static corosync_cfg_handle_t cfg_handle;
static struct cpg_node this_node;
-static LIST_HEAD(corosync_notify_list);
-static LIST_HEAD(corosync_confchg_list);
+static LIST_HEAD(corosync_block_event_list);
+static LIST_HEAD(corosync_nonblock_event_list);
static struct cpg_node cpg_nodes[SD_MAX_NODES];
static size_t nr_cpg_nodes;
@@ -200,11 +200,11 @@ retry:
}
static inline struct corosync_event *
-find_notify_event(enum corosync_event_type type, struct cpg_node *sender)
+find_block_event(enum corosync_event_type type, struct cpg_node *sender)
{
struct corosync_event *cevent;
- list_for_each_entry(cevent, &corosync_notify_list, list) {
+ list_for_each_entry(cevent, &corosync_block_event_list, list) {
if (cevent->type == type &&
cpg_node_equal(&cevent->sender, sender))
return cevent;
@@ -214,11 +214,11 @@ find_notify_event(enum corosync_event_type type, struct cpg_node *sender)
}
static inline struct corosync_event *
-find_confchg_event(enum corosync_event_type type, struct cpg_node *sender)
+find_nonblock_event(enum corosync_event_type type, struct cpg_node *sender)
{
struct corosync_event *cevent;
- list_for_each_entry(cevent, &corosync_confchg_list, list) {
+ list_for_each_entry(cevent, &corosync_nonblock_event_list, list) {
if (cevent->type == type &&
cpg_node_equal(&cevent->sender, sender))
return cevent;
@@ -227,22 +227,13 @@ find_confchg_event(enum corosync_event_type type, struct cpg_node *sender)
return NULL;
}
-static inline bool event_is_confchg(enum corosync_event_type type)
-{
- if (type == COROSYNC_EVENT_TYPE_BLOCK ||
- type == COROSYNC_EVENT_TYPE_NOTIFY)
- return false;
-
- return true;
-}
-
static inline struct corosync_event *
find_event(enum corosync_event_type type, struct cpg_node *sender)
{
- if (event_is_confchg(type))
- return find_confchg_event(type, sender);
+ if (type == COROSYNC_EVENT_TYPE_BLOCK)
+ return find_block_event(type, sender);
else
- return find_notify_event(type, sender);
+ return find_nonblock_event(type, sender);
}
static int is_master(struct cpg_node *node)
@@ -365,13 +356,13 @@ static void __corosync_dispatch(void)
{
struct corosync_event *cevent;
- while (!list_empty(&corosync_notify_list) ||
- !list_empty(&corosync_confchg_list)) {
- if (!list_empty(&corosync_confchg_list))
- cevent = list_first_entry(&corosync_confchg_list,
+ while (!list_empty(&corosync_block_event_list) ||
+ !list_empty(&corosync_nonblock_event_list)) {
+ if (!list_empty(&corosync_nonblock_event_list))
+ cevent = list_first_entry(&corosync_nonblock_event_list,
typeof(*cevent), list);
else
- cevent = list_first_entry(&corosync_notify_list,
+ cevent = list_first_entry(&corosync_block_event_list,
typeof(*cevent), list);
/* update join status */
@@ -442,6 +433,14 @@ update_event(enum corosync_event_type type, struct cpg_node *sender, void *msg,
return cevent;
}
+static void queue_event(struct corosync_event *cevent)
+{
+ if (cevent->type == COROSYNC_EVENT_TYPE_BLOCK)
+ list_add_tail(&cevent->list, &corosync_block_event_list);
+ else
+ list_add_tail(&cevent->list, &corosync_nonblock_event_list);
+}
+
static void cdrv_cpg_deliver(cpg_handle_t handle,
const struct cpg_name *group_name,
uint32_t nodeid, uint32_t pid,
@@ -484,8 +483,7 @@ static void cdrv_cpg_deliver(cpg_handle_t handle,
} else
cevent->msg = NULL;
-
- list_add_tail(&cevent->list, &corosync_notify_list);
+ queue_event(cevent);
break;
case COROSYNC_MSG_TYPE_LEAVE:
cevent = zalloc(sizeof(*cevent));
@@ -508,7 +506,7 @@ static void cdrv_cpg_deliver(cpg_handle_t handle,
} else
cevent->msg = NULL;
- list_add_tail(&cevent->list, &corosync_confchg_list);
+ queue_event(cevent);
break;
case COROSYNC_MSG_TYPE_JOIN_RESPONSE:
cevent = update_event(COROSYNC_EVENT_TYPE_JOIN_REQUEST,
@@ -607,7 +605,7 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
cevent->type = COROSYNC_EVENT_TYPE_LEAVE;
cevent->sender = left_sheep[i];
- list_add_tail(&cevent->list, &corosync_confchg_list);
+ queue_event(cevent);
}
/* dispatch join_handler */
@@ -618,7 +616,7 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
cevent->type = COROSYNC_EVENT_TYPE_JOIN_REQUEST;
cevent->sender = joined_sheep[i];
- list_add_tail(&cevent->list, &corosync_confchg_list);
+ queue_event(cevent);
}
if (!join_finished) {
diff --git a/sheep/group.c b/sheep/group.c
index 3571b96..b05ff3e 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -312,7 +312,7 @@ bool sd_block_handler(struct sd_node *sender)
cluster_op_running = true;
- req = list_first_entry(&sys->pending_list,
+ req = list_first_entry(&sys->pending_block_list,
struct request, pending_list);
req->work.fn = do_process_work;
req->work.done = cluster_op_done;
@@ -333,7 +333,7 @@ void queue_cluster_request(struct request *req)
eprintf("%s (%p)\n", op_name(req->op), req);
if (has_process_work(req->op)) {
- list_add_tail(&req->pending_list, &sys->pending_list);
+ list_add_tail(&req->pending_list, &sys->pending_block_list);
sys->cdrv->block();
} else {
struct vdi_op_message *msg;
@@ -343,7 +343,7 @@ void queue_cluster_request(struct request *req)
if (!msg)
return;
- list_add_tail(&req->pending_list, &sys->pending_list);
+ list_add_tail(&req->pending_list, &sys->pending_notify_list);
msg->rsp.result = SD_RES_SUCCESS;
sys->cdrv->notify(msg, size);
@@ -921,8 +921,12 @@ void sd_notify_handler(struct sd_node *sender, void *data, size_t data_len)
op_name(op), data_len, node_to_str(sender));
if (is_myself(sender->nid.addr, sender->nid.port)) {
- req = list_first_entry(&sys->pending_list, struct request,
- pending_list);
+ if (has_process_work(op))
+ req = list_first_entry(&sys->pending_block_list,
+ struct request, pending_list);
+ else
+ req = list_first_entry(&sys->pending_notify_list,
+ struct request, pending_list);
list_del(&req->pending_list);
}
@@ -1237,7 +1241,8 @@ int create_cluster(int port, int64_t zone, int nr_vnodes,
sys->status = SD_STATUS_WAIT_FOR_FORMAT;
}
- INIT_LIST_HEAD(&sys->pending_list);
+ INIT_LIST_HEAD(&sys->pending_block_list);
+ INIT_LIST_HEAD(&sys->pending_notify_list);
INIT_LIST_HEAD(&sys->failed_nodes);
INIT_LIST_HEAD(&sys->delayed_nodes);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 90006f6..2def5b3 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -91,7 +91,8 @@ struct cluster_info {
*/
struct list_head delayed_nodes;
- struct list_head pending_list;
+ struct list_head pending_block_list;
+ struct list_head pending_notify_list;
DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS);
--
1.7.2.5
More information about the sheepdog
mailing list