[sheepdog] [PATCH 3/4] sheep: handle block/unblock/notify error
Kai Zhang
kyle at zelin.io
Sat Jul 6 03:59:07 CEST 2013
Current group.c doesn't handle error of block/unblock/notify event.
This patch add a new error SD_RES_CLUSTER_ERROR to indicate thess errors.
Signed-off-by: Kai Zhang <kyle at zelin.io>
---
include/sheep.h | 1 +
include/sheepdog_proto.h | 1 +
sheep/cluster.h | 4 ++--
sheep/cluster/corosync.c | 10 +++++-----
sheep/cluster/local.c | 8 ++++++--
sheep/cluster/shepherd.c | 8 +++++---
sheep/cluster/zookeeper.c | 8 ++++----
sheep/group.c | 27 ++++++++++++++++++++++-----
8 files changed, 46 insertions(+), 21 deletions(-)
diff --git a/include/sheep.h b/include/sheep.h
index 0d3fae4..3541012 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -204,6 +204,7 @@ static inline const char *sd_strerror(int err)
[SD_RES_JOIN_FAILED] = "Node has failed to join cluster",
[SD_RES_HALT] = "IO has halted as there are too few living nodes",
[SD_RES_READONLY] = "Object is read-only",
+ [SD_RES_CLUSTER_ERROR] = "Cluster error",
/* from internal_proto.h */
[SD_RES_OLD_NODE_VER] = "Request has an old epoch",
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 156457a..4e9c84e 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -71,6 +71,7 @@
#define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */
#define SD_RES_HALT 0x19 /* Sheepdog is stopped doing IO */
#define SD_RES_READONLY 0x1A /* Object is read-only */
+#define SD_RES_CLUSTER_ERROR 0x1B /* Cluster error */
/* errors above 0x80 are sheepdog-internal */
diff --git a/sheep/cluster.h b/sheep/cluster.h
index a912985..16a09b8 100644
--- a/sheep/cluster.h
+++ b/sheep/cluster.h
@@ -93,13 +93,13 @@ struct cluster_driver {
* nodes it needs to call sd_block_handler() on the node where ->block
* was called.
*/
- void (*block)(void);
+ int (*block)(void);
/*
* Unblock events on all nodes, and send a total order message
* to all nodes.
*/
- void (*unblock)(void *msg, size_t msg_len);
+ int (*unblock)(void *msg, size_t msg_len);
/* Update the specific node in the driver's private copy of nodes */
void (*update_node)(struct sd_node *);
diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
index bf90209..8db12cf 100644
--- a/sheep/cluster/corosync.c
+++ b/sheep/cluster/corosync.c
@@ -704,16 +704,16 @@ static int corosync_leave(void)
NULL, 0);
}
-static void corosync_block(void)
+static int corosync_block(void)
{
- send_message(COROSYNC_MSG_TYPE_BLOCK, 0, &this_node, NULL, 0,
+ return send_message(COROSYNC_MSG_TYPE_BLOCK, 0, &this_node, NULL, 0,
NULL, 0);
}
-static void corosync_unblock(void *msg, size_t msg_len)
+static int corosync_unblock(void *msg, size_t msg_len)
{
- send_message(COROSYNC_MSG_TYPE_UNBLOCK, 0, &this_node, NULL, 0,
- msg, msg_len);
+ return send_message(COROSYNC_MSG_TYPE_UNBLOCK, 0, &this_node, NULL, 0,
+ msg, msg_len);
}
static int corosync_notify(void *msg, size_t msg_len)
diff --git a/sheep/cluster/local.c b/sheep/cluster/local.c
index 572aa19..52f4934 100644
--- a/sheep/cluster/local.c
+++ b/sheep/cluster/local.c
@@ -361,16 +361,18 @@ static int local_notify(void *msg, size_t msg_len)
return 0;
}
-static void local_block(void)
+static int local_block(void)
{
shm_queue_lock();
add_event(EVENT_BLOCK, &this_node, NULL, 0);
shm_queue_unlock();
+
+ return 0;
}
-static void local_unblock(void *msg, size_t msg_len)
+static int local_unblock(void *msg, size_t msg_len)
{
struct local_event *ev;
@@ -384,6 +386,8 @@ static void local_unblock(void *msg, size_t msg_len)
add_event(EVENT_NOTIFY, &this_node, msg, msg_len);
shm_queue_unlock();
+
+ return 0;
}
/* Returns true if an event is processed */
diff --git a/sheep/cluster/shepherd.c b/sheep/cluster/shepherd.c
index fba329c..4720513 100644
--- a/sheep/cluster/shepherd.c
+++ b/sheep/cluster/shepherd.c
@@ -641,7 +641,7 @@ static int shepherd_notify(void *msg, size_t msg_len)
return do_shepherd_notify(false, msg, msg_len);
}
-static void shepherd_block(void)
+static int shepherd_block(void)
{
int ret;
struct sph_msg msg;
@@ -654,11 +654,13 @@ static void shepherd_block(void)
sd_eprintf("xwrite() failed: %m");
exit(1);
}
+
+ return 0;
}
-static void shepherd_unblock(void *msg, size_t msg_len)
+static int shepherd_unblock(void *msg, size_t msg_len)
{
- do_shepherd_notify(true, msg, msg_len);
+ return do_shepherd_notify(true, msg, msg_len);
}
/* FIXME: shepherd server also has to udpate node information */
diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
index 29a1237..be3088c 100644
--- a/sheep/cluster/zookeeper.c
+++ b/sheep/cluster/zookeeper.c
@@ -824,14 +824,14 @@ static int zk_notify(void *msg, size_t msg_len)
return add_event(EVENT_NOTIFY, &this_node, msg, msg_len);
}
-static void zk_block(void)
+static int zk_block(void)
{
- add_event(EVENT_BLOCK, &this_node, NULL, 0);
+ return add_event(EVENT_BLOCK, &this_node, NULL, 0);
}
-static void zk_unblock(void *msg, size_t msg_len)
+static int zk_unblock(void *msg, size_t msg_len)
{
- add_event(EVENT_UNBLOCK, &this_node, msg, msg_len);
+ return add_event(EVENT_UNBLOCK, &this_node, msg, msg_len);
}
static void zk_handle_join_request(struct zk_event *ev)
diff --git a/sheep/group.c b/sheep/group.c
index 83c3445..2fa4091 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -254,7 +254,16 @@ static void cluster_op_done(struct work *work)
sd_dprintf("%s (%p)", op_name(req->op), req);
msg = prepare_cluster_msg(req, &size);
- sys->cdrv->unblock(msg, size);
+
+ if (sys->cdrv->unblock(msg, size) != 0) {
+ /*
+ * Failed to unblock, shoot myself to let other sheep
+ * unblock the event.
+ * FIXME: handle it gracefully.
+ */
+ sd_eprintf("Failed to unblock, exiting.");
+ exit(1);
+ }
free(msg);
}
@@ -301,22 +310,30 @@ void queue_cluster_request(struct request *req)
sd_dprintf("%s (%p)", op_name(req->op), req);
if (has_process_work(req->op)) {
+ if (sys->cdrv->block() != 0)
+ goto error;
list_add_tail(&req->pending_list,
main_thread_get(pending_block_list));
- sys->cdrv->block();
+
} else {
struct vdi_op_message *msg;
size_t size;
msg = prepare_cluster_msg(req, &size);
+ msg->rsp.result = SD_RES_SUCCESS;
+ if (sys->cdrv->notify(msg, size) != 0)
+ goto error;
+
list_add_tail(&req->pending_list,
main_thread_get(pending_notify_list));
- msg->rsp.result = SD_RES_SUCCESS;
- sys->cdrv->notify(msg, size);
-
free(msg);
}
+
+ return;
+error:
+ req->rp.result = SD_RES_CLUSTER_ERROR;
+ put_request(req);
}
static inline int get_nodes_nr_from(struct list_head *l)
--
1.7.9.5
More information about the sheepdog
mailing list