[sheepdog] [PATCH v3 1/2] sheep: handle block/unblock/notify error

Kai Zhang kyle at zelin.io
Tue Jul 9 06:07:14 CEST 2013


In group.c, it uses 3 broadcast operations: block, unblock and notify.
These broadcast operations are implemented by cluster drivers.
For example, corosync implements it by cpg_mcast_joined() while zookeeper by
sequential node.
And they can fail if network is unavailable for a while.

However, current group.c doesn't handle errors of block/unblock/notify events
and just ignore them.

This patch add a new error SD_RES_CLUSTER_ERROR to indicate these errors.

Signed-off-by: Kai Zhang <kyle at zelin.io>
---
 include/sheep.h           |    1 +
 include/sheepdog_proto.h  |    1 +
 sheep/cluster.h           |   10 +++++++---
 sheep/cluster/corosync.c  |   17 ++++++++++-------
 sheep/cluster/local.c     |   10 +++++++---
 sheep/cluster/shepherd.c  |   12 ++++++++----
 sheep/cluster/zookeeper.c |   13 ++++++++-----
 sheep/group.c             |   40 +++++++++++++++++++++++++++++++++++-----
 8 files changed, 77 insertions(+), 27 deletions(-)

diff --git a/include/sheep.h b/include/sheep.h
index 0d3fae4..3541012 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -204,6 +204,7 @@ static inline const char *sd_strerror(int err)
 		[SD_RES_JOIN_FAILED] = "Node has failed to join cluster",
 		[SD_RES_HALT] = "IO has halted as there are too few living nodes",
 		[SD_RES_READONLY] = "Object is read-only",
+		[SD_RES_CLUSTER_ERROR] = "Cluster error",
 
 		/* from internal_proto.h */
 		[SD_RES_OLD_NODE_VER] = "Request has an old epoch",
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 156457a..4e9c84e 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -71,6 +71,7 @@
 #define SD_RES_JOIN_FAILED   0x18 /* Target node had failed to join sheepdog */
 #define SD_RES_HALT          0x19 /* Sheepdog is stopped doing IO */
 #define SD_RES_READONLY      0x1A /* Object is read-only */
+#define SD_RES_CLUSTER_ERROR 0x1B /* Cluster error */
 
 /* errors above 0x80 are sheepdog-internal */
 
diff --git a/sheep/cluster.h b/sheep/cluster.h
index a912985..4851290 100644
--- a/sheep/cluster.h
+++ b/sheep/cluster.h
@@ -82,7 +82,7 @@ struct cluster_driver {
 	 * can be read through sd_notify_handler() and totally ordered with
 	 * node change events.
 	 *
-	 * Returns zero on success, -1 on error
+	 * Returns SD_RES_XXX
 	 */
 	int (*notify)(void *msg, size_t msg_len);
 
@@ -92,14 +92,18 @@ struct cluster_driver {
 	 * Once the cluster driver has ensured that events are blocked on all
 	 * nodes it needs to call sd_block_handler() on the node where ->block
 	 * was called.
+	 *
+	 * Returns SD_RES_XXX
 	 */
-	void (*block)(void);
+	int (*block)(void);
 
 	/*
 	 * Unblock events on all nodes, and send a total order message
 	 * to all nodes.
+	 *
+	 * Returns SD_RES_XXX
 	 */
-	void (*unblock)(void *msg, size_t msg_len);
+	int (*unblock)(void *msg, size_t msg_len);
 
 	/* Update the specific node in the driver's private copy of nodes */
 	void (*update_node)(struct sd_node *);
diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
index bf90209..56c4737 100644
--- a/sheep/cluster/corosync.c
+++ b/sheep/cluster/corosync.c
@@ -704,22 +704,25 @@ static int corosync_leave(void)
 			    NULL, 0);
 }
 
-static void corosync_block(void)
+static int corosync_block(void)
 {
-	send_message(COROSYNC_MSG_TYPE_BLOCK, 0, &this_node, NULL, 0,
-			    NULL, 0);
+	return send_message(COROSYNC_MSG_TYPE_BLOCK, 0, &this_node, NULL, 0,
+			    NULL, 0) == 0 ?
+		SD_RES_SUCCESS : SD_RES_CLUSTER_ERROR;
 }
 
-static void corosync_unblock(void *msg, size_t msg_len)
+static int corosync_unblock(void *msg, size_t msg_len)
 {
-	send_message(COROSYNC_MSG_TYPE_UNBLOCK, 0, &this_node, NULL, 0,
-		     msg, msg_len);
+	return send_message(COROSYNC_MSG_TYPE_UNBLOCK, 0, &this_node, NULL, 0,
+			    msg, msg_len) == 0 ?
+		SD_RES_SUCCESS : SD_RES_CLUSTER_ERROR;
 }
 
 static int corosync_notify(void *msg, size_t msg_len)
 {
 	return send_message(COROSYNC_MSG_TYPE_NOTIFY, 0, &this_node,
-			   NULL, 0, msg, msg_len);
+			   NULL, 0, msg, msg_len) ?
+		SD_RES_SUCCESS : SD_RES_CLUSTER_ERROR;
 }
 
 static void corosync_handler(int listen_fd, int events, void *data)
diff --git a/sheep/cluster/local.c b/sheep/cluster/local.c
index 307a69e..e6fa149 100644
--- a/sheep/cluster/local.c
+++ b/sheep/cluster/local.c
@@ -358,19 +358,21 @@ static int local_notify(void *msg, size_t msg_len)
 
 	shm_queue_unlock();
 
-	return 0;
+	return SD_RES_SUCCESS;
 }
 
-static void local_block(void)
+static int local_block(void)
 {
 	shm_queue_lock();
 
 	add_event(EVENT_BLOCK, &this_node, NULL, 0);
 
 	shm_queue_unlock();
+
+	return SD_RES_SUCCESS;
 }
 
-static void local_unblock(void *msg, size_t msg_len)
+static int local_unblock(void *msg, size_t msg_len)
 {
 	struct local_event *ev;
 
@@ -384,6 +386,8 @@ static void local_unblock(void *msg, size_t msg_len)
 	add_event(EVENT_NOTIFY, &this_node, msg, msg_len);
 
 	shm_queue_unlock();
+
+	return SD_RES_SUCCESS;
 }
 
 /* Returns true if an event is processed */
diff --git a/sheep/cluster/shepherd.c b/sheep/cluster/shepherd.c
index fba329c..df8737f 100644
--- a/sheep/cluster/shepherd.c
+++ b/sheep/cluster/shepherd.c
@@ -638,10 +638,11 @@ static int do_shepherd_notify(bool unblock, void *msg, size_t msg_len)
 
 static int shepherd_notify(void *msg, size_t msg_len)
 {
-	return do_shepherd_notify(false, msg, msg_len);
+	return do_shepherd_notify(false, msg, msg_len) == 0 ?
+		SD_RES_SUCCESS : SD_RES_CLUSTER_ERROR;
 }
 
-static void shepherd_block(void)
+static int shepherd_block(void)
 {
 	int ret;
 	struct sph_msg msg;
@@ -654,11 +655,14 @@ static void shepherd_block(void)
 		sd_eprintf("xwrite() failed: %m");
 		exit(1);
 	}
+
+	return SD_RES_SUCCESS;
 }
 
-static void shepherd_unblock(void *msg, size_t msg_len)
+static int shepherd_unblock(void *msg, size_t msg_len)
 {
-	do_shepherd_notify(true, msg, msg_len);
+	return do_shepherd_notify(true, msg, msg_len) == 0 ?
+		SD_RES_SUCCESS : SD_RES_CLUSTER_ERROR;
 }
 
 /* FIXME: shepherd server also has to udpate node information */
diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
index 0ac1677..f9f9511 100644
--- a/sheep/cluster/zookeeper.c
+++ b/sheep/cluster/zookeeper.c
@@ -823,17 +823,20 @@ static int zk_leave(void)
 
 static int zk_notify(void *msg, size_t msg_len)
 {
-	return add_event(EVENT_NOTIFY, &this_node, msg, msg_len);
+	return add_event(EVENT_NOTIFY, &this_node, msg, msg_len) == 0 ?
+		SD_RES_SUCCESS : SD_RES_CLUSTER_ERROR;;
 }
 
-static void zk_block(void)
+static int zk_block(void)
 {
-	add_event(EVENT_BLOCK, &this_node, NULL, 0);
+	return add_event(EVENT_BLOCK, &this_node, NULL, 0) == 0 ?
+		SD_RES_SUCCESS : SD_RES_CLUSTER_ERROR;
 }
 
-static void zk_unblock(void *msg, size_t msg_len)
+static int zk_unblock(void *msg, size_t msg_len)
 {
-	add_event(EVENT_UNBLOCK, &this_node, msg, msg_len);
+	return add_event(EVENT_UNBLOCK, &this_node, msg, msg_len) == 0 ?
+		SD_RES_SUCCESS : SD_RES_CLUSTER_ERROR;
 }
 
 static void zk_handle_join_request(struct zk_event *ev)
diff --git a/sheep/group.c b/sheep/group.c
index 6e01a8d..e5585b6 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -232,11 +232,23 @@ static void cluster_op_done(struct work *work)
 	struct request *req = container_of(work, struct request, work);
 	struct vdi_op_message *msg;
 	size_t size;
+	int ret;
 
 	sd_dprintf("%s (%p)", op_name(req->op), req);
 
 	msg = prepare_cluster_msg(req, &size);
-	sys->cdrv->unblock(msg, size);
+
+	ret = sys->cdrv->unblock(msg, size);
+	if (ret != SD_RES_SUCCESS) {
+		/*
+		 * Failed to unblock, shoot myself to let other sheep
+		 * unblock the event.
+		 * FIXME: handle it gracefully.
+		 */
+		sd_printf(SDOG_EMERG, "Failed to unblock, %s, exiting.",
+			  sd_strerror(ret));
+		exit(1);
+	}
 
 	free(msg);
 }
@@ -280,25 +292,43 @@ bool sd_block_handler(const struct sd_node *sender)
  */
 void queue_cluster_request(struct request *req)
 {
+	int ret;
 	sd_dprintf("%s (%p)", op_name(req->op), req);
 
 	if (has_process_work(req->op)) {
+		ret = sys->cdrv->block();
+		if (ret != 0) {
+			sd_eprintf("failed to broadcast block to cluster, %s",
+				   sd_strerror(ret));
+			goto error;
+		}
 		list_add_tail(&req->pending_list,
 			      main_thread_get(pending_block_list));
-		sys->cdrv->block();
+
 	} else {
 		struct vdi_op_message *msg;
 		size_t size;
 
 		msg = prepare_cluster_msg(req, &size);
+		msg->rsp.result = SD_RES_SUCCESS;
+
+		ret = sys->cdrv->notify(msg, size);
+		if (ret != 0) {
+			sd_eprintf("failed to broadcast notify to cluster, %s",
+				   sd_strerror(ret));
+			goto error;
+		}
+
 		list_add_tail(&req->pending_list,
 			      main_thread_get(pending_notify_list));
 
-		msg->rsp.result = SD_RES_SUCCESS;
-		sys->cdrv->notify(msg, size);
-
 		free(msg);
 	}
+
+	return;
+error:
+	req->rp.result = ret;
+	put_request(req);
 }
 
 static inline int get_nodes_nr_from(struct list_head *l)
-- 
1.7.9.5




More information about the sheepdog mailing list