[sheepdog] [PATCH v4 1/2] sheep: handle block/unblock/notify error

Tue Jul 9 12:08:11 CEST 2013

In group.c, it uses 3 broadcast operations: block, unblock and notify.
These broadcast operations are implemented by cluster drivers.
For example, corosync implements it by cpg_mcast_joined() while zookeeper by
sequential node.
And they can fail if network is unavailable for a while.

However, current group.c doesn't handle errors of block/unblock/notify events
and just ignore them.

This patch add a new error SD_RES_CLUSTER_ERROR to indicate these errors.

Signed-off-by: Kai Zhang <kyle at zelin.io>
---
 include/internal_proto.h  |    1 +
 include/sheep.h           |    1 +
 sheep/cluster.h           |   10 +++++++---
 sheep/cluster/corosync.c  |   16 ++++++++--------
 sheep/cluster/local.c     |   10 +++++++---
 sheep/cluster/shepherd.c  |   12 ++++++++----
 sheep/cluster/zookeeper.c |   12 ++++++------
 sheep/group.c             |   39 ++++++++++++++++++++++++++++++++++-----
 8 files changed, 72 insertions(+), 29 deletions(-)

diff --git a/include/internal_proto.h b/include/internal_proto.h
index c5cd76d..149f8f8 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -101,6 +101,7 @@
 #define SD_RES_OID_EXIST        0x8E /* Object ID exists already */
 #define SD_RES_AGAIN            0x8F /* Ask to try again */
 #define SD_RES_STALE_OBJ        0x90 /* Object may be stale */
+#define SD_RES_CLUSTER_ERROR    0x91 /* Cluster driver error */
 
 #define SD_FLAG_NOHALT       0x0004 /* Serve the IO rquest even lack of nodes */
 #define SD_FLAG_QUORUM       0x0008 /* Serve the IO rquest as long we are quorate */
diff --git a/include/sheep.h b/include/sheep.h
index 0d3fae4..cd23b75 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -222,6 +222,7 @@ static inline const char *sd_strerror(int err)
 		[SD_RES_OID_EXIST] = "Object ID exists already",
 		[SD_RES_AGAIN] = "Ask to try again",
 		[SD_RES_STALE_OBJ] = "Object may be stale",
+		[SD_RES_CLUSTER_ERROR] = "Cluster driver error",
 	};
 
 	if (descs[err] == NULL) {
diff --git a/sheep/cluster.h b/sheep/cluster.h
index a912985..4851290 100644
--- a/sheep/cluster.h
+++ b/sheep/cluster.h
@@ -82,7 +82,7 @@ struct cluster_driver {
 	 * can be read through sd_notify_handler() and totally ordered with
 	 * node change events.
 	 *
-	 * Returns zero on success, -1 on error
+	 * Returns SD_RES_XXX
 	 */
 	int (*notify)(void *msg, size_t msg_len);
 
@@ -92,14 +92,18 @@ struct cluster_driver {
 	 * Once the cluster driver has ensured that events are blocked on all
 	 * nodes it needs to call sd_block_handler() on the node where ->block
 	 * was called.
+	 *
+	 * Returns SD_RES_XXX
 	 */
-	void (*block)(void);
+	int (*block)(void);
 
 	/*
 	 * Unblock events on all nodes, and send a total order message
 	 * to all nodes.
+	 *
+	 * Returns SD_RES_XXX
 	 */
-	void (*unblock)(void *msg, size_t msg_len);
+	int (*unblock)(void *msg, size_t msg_len);
 
 	/* Update the specific node in the driver's private copy of nodes */
 	void (*update_node)(struct sd_node *);
diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
index bf90209..9b59126 100644
--- a/sheep/cluster/corosync.c
+++ b/sheep/cluster/corosync.c
@@ -198,9 +198,9 @@ retry:
 		goto retry;
 	default:
 		sd_eprintf("failed to send message (%d)", ret);
-		return -1;
+		return SD_RES_CLUSTER_ERROR;
 	}
-	return 0;
+	return SD_RES_SUCCESS;
 }
 
 static inline struct corosync_event *
@@ -704,22 +704,22 @@ static int corosync_leave(void)
 			    NULL, 0);
 }
 
-static void corosync_block(void)
+static int corosync_block(void)
 {
-	send_message(COROSYNC_MSG_TYPE_BLOCK, 0, &this_node, NULL, 0,
+	return send_message(COROSYNC_MSG_TYPE_BLOCK, 0, &this_node, NULL, 0,
 			    NULL, 0);
 }
 
-static void corosync_unblock(void *msg, size_t msg_len)
+static int corosync_unblock(void *msg, size_t msg_len)
 {
-	send_message(COROSYNC_MSG_TYPE_UNBLOCK, 0, &this_node, NULL, 0,
-		     msg, msg_len);
+	return send_message(COROSYNC_MSG_TYPE_UNBLOCK, 0, &this_node, NULL, 0,
+			    msg, msg_len);
 }
 
 static int corosync_notify(void *msg, size_t msg_len)
 {
 	return send_message(COROSYNC_MSG_TYPE_NOTIFY, 0, &this_node,
-			   NULL, 0, msg, msg_len);
+			    NULL, 0, msg, msg_len);
 }
 
 static void corosync_handler(int listen_fd, int events, void *data)
diff --git a/sheep/cluster/local.c b/sheep/cluster/local.c
index 307a69e..e6fa149 100644
--- a/sheep/cluster/local.c
+++ b/sheep/cluster/local.c
@@ -358,19 +358,21 @@ static int local_notify(void *msg, size_t msg_len)
 
 	shm_queue_unlock();
 
-	return 0;
+	return SD_RES_SUCCESS;
 }
 
-static void local_block(void)
+static int local_block(void)
 {
 	shm_queue_lock();
 
 	add_event(EVENT_BLOCK, &this_node, NULL, 0);
 
 	shm_queue_unlock();
+
+	return SD_RES_SUCCESS;
 }
 
-static void local_unblock(void *msg, size_t msg_len)
+static int local_unblock(void *msg, size_t msg_len)
 {
 	struct local_event *ev;
 
@@ -384,6 +386,8 @@ static void local_unblock(void *msg, size_t msg_len)
 	add_event(EVENT_NOTIFY, &this_node, msg, msg_len);
 
 	shm_queue_unlock();
+
+	return SD_RES_SUCCESS;
 }
 
 /* Returns true if an event is processed */
diff --git a/sheep/cluster/shepherd.c b/sheep/cluster/shepherd.c
index fba329c..df8737f 100644
--- a/sheep/cluster/shepherd.c
+++ b/sheep/cluster/shepherd.c
@@ -638,10 +638,11 @@ static int do_shepherd_notify(bool unblock, void *msg, size_t msg_len)
 
 static int shepherd_notify(void *msg, size_t msg_len)
 {
-	return do_shepherd_notify(false, msg, msg_len);
+	return do_shepherd_notify(false, msg, msg_len) == 0 ?
+		SD_RES_SUCCESS : SD_RES_CLUSTER_ERROR;
 }
 
-static void shepherd_block(void)
+static int shepherd_block(void)
 {
 	int ret;
 	struct sph_msg msg;
@@ -654,11 +655,14 @@ static void shepherd_block(void)
 		sd_eprintf("xwrite() failed: %m");
 		exit(1);
 	}
+
+	return SD_RES_SUCCESS;
 }
 
-static void shepherd_unblock(void *msg, size_t msg_len)
+static int shepherd_unblock(void *msg, size_t msg_len)
 {
-	do_shepherd_notify(true, msg, msg_len);
+	return do_shepherd_notify(true, msg, msg_len) == 0 ?
+		SD_RES_SUCCESS : SD_RES_CLUSTER_ERROR;
 }
 
 /* FIXME: shepherd server also has to udpate node information */
diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c
index 0ac1677..ac6347a 100644
--- a/sheep/cluster/zookeeper.c
+++ b/sheep/cluster/zookeeper.c
@@ -529,10 +529,10 @@ static int add_event(enum zk_event_type type, struct zk_node *znode, void *buf,
 		memcpy(ev.buf, buf, buf_len);
 	rc = zk_queue_push(&ev);
 	if (rc == ZOK)
-		return 0;
+		return SD_RES_SUCCESS;
 	else {
 		sd_eprintf("failed, type: %d, %s", type, zerror(rc));
-		return -1;
+		return SD_RES_CLUSTER_ERROR;
 	}
 }
 
@@ -826,14 +826,14 @@ static int zk_notify(void *msg, size_t msg_len)
 	return add_event(EVENT_NOTIFY, &this_node, msg, msg_len);
 }
 
-static void zk_block(void)
+static int zk_block(void)
 {
-	add_event(EVENT_BLOCK, &this_node, NULL, 0);
+	return add_event(EVENT_BLOCK, &this_node, NULL, 0);
 }
 
-static void zk_unblock(void *msg, size_t msg_len)
+static int zk_unblock(void *msg, size_t msg_len)
 {
-	add_event(EVENT_UNBLOCK, &this_node, msg, msg_len);
+	return add_event(EVENT_UNBLOCK, &this_node, msg, msg_len);
 }
 
 static void zk_handle_join_request(struct zk_event *ev)
diff --git a/sheep/group.c b/sheep/group.c
index 6e01a8d..546e4ae 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -232,11 +232,23 @@ static void cluster_op_done(struct work *work)
 	struct request *req = container_of(work, struct request, work);
 	struct vdi_op_message *msg;
 	size_t size;
+	int ret;
 
 	sd_dprintf("%s (%p)", op_name(req->op), req);
 
 	msg = prepare_cluster_msg(req, &size);
-	sys->cdrv->unblock(msg, size);
+
+	ret = sys->cdrv->unblock(msg, size);
+	if (ret != SD_RES_SUCCESS) {
+		/*
+		 * Failed to unblock, shoot myself to let other sheep
+		 * unblock the event.
+		 * FIXME: handle it gracefully.
+		 */
+		sd_printf(SDOG_EMERG, "Failed to unblock, %s, exiting.",
+			  sd_strerror(ret));
+		exit(1);
+	}
 
 	free(msg);
 }
@@ -280,25 +292,42 @@ bool sd_block_handler(const struct sd_node *sender)
  */
 void queue_cluster_request(struct request *req)
 {
+	int ret;
 	sd_dprintf("%s (%p)", op_name(req->op), req);
 
 	if (has_process_work(req->op)) {
+		ret = sys->cdrv->block();
+		if (ret != SD_RES_SUCCESS) {
+			sd_eprintf("failed to broadcast block to cluster, %s",
+				   sd_strerror(ret));
+			goto error;
+		}
 		list_add_tail(&req->pending_list,
 			      main_thread_get(pending_block_list));
-		sys->cdrv->block();
 	} else {
 		struct vdi_op_message *msg;
 		size_t size;
 
 		msg = prepare_cluster_msg(req, &size);
+		msg->rsp.result = SD_RES_SUCCESS;
+
+		ret = sys->cdrv->notify(msg, size);
+		if (ret != SD_RES_SUCCESS) {
+			sd_eprintf("failed to broadcast notify to cluster, %s",
+				   sd_strerror(ret));
+			goto error;
+		}
+
 		list_add_tail(&req->pending_list,
 			      main_thread_get(pending_notify_list));
 
-		msg->rsp.result = SD_RES_SUCCESS;
-		sys->cdrv->notify(msg, size);
-
 		free(msg);
 	}
+
+	return;
+error:
+	req->rp.result = ret;
+	put_request(req);
 }
 
 static inline int get_nodes_nr_from(struct list_head *l)
-- 
1.7.9.5