[sheepdog] [PATCH v4 2/2] cluster driver: handle pending block/notify event during reconnect

Tue Jul 9 12:08:12 CEST 2013

Current implementation of reconnection doesn't handle pending block/notify
event.

It is easy to handle notify event by calling sd_notify_handler() manually
to finish it.

However, it is a little bit complex for block event.
This is because a block event need 4 steps.
1. in queue_cluster_request(), send block event by sys->cdrv->block(), and
  add to pending_block_list.
2. in sd_block_handler(), queue the event to work queue of 'block' thread.
3. in cluster_op_done(), send unblock event by sys->cdrv->unblock().
4. in sd_notify_handler(), remove it from pending_block_list.

And step 1 and 3 contains broadcast operations.
So we have to know which step has been done for a pending block event.

If step 1 has been done, we can re-queue it simply. (Any block event which sent
by this node have been removed due to the leave event)
If step 2 has been done, the event is handling by another thread. We have to mark
it as 'drop' so that it will be dropped when cluster_op_done() is called later.
if step 3 has been done, we should call sd_notify_handler() manually to finish
it.

Signed-off-by: Kai Zhang <kyle at zelin.io>
---
 sheep/group.c      |   80 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 sheep/sheep_priv.h |    8 ++++++
 2 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/sheep/group.c b/sheep/group.c
index 546e4ae..370c625 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -234,6 +234,9 @@ static void cluster_op_done(struct work *work)
 	size_t size;
 	int ret;
 
+	if (req->status == REQUEST_DROPPED)
+		goto drop;
+
 	sd_dprintf("%s (%p)", op_name(req->op), req);
 
 	msg = prepare_cluster_msg(req, &size);
@@ -251,6 +254,13 @@ static void cluster_op_done(struct work *work)
 	}
 
 	free(msg);
+	req->status = REQUEST_DONE;
+	return;
+drop:
+	list_del(&req->pending_list);
+	req->rp.result = SD_RES_CLUSTER_ERROR;
+	put_request(req);
+	cluster_op_running = false;
 }
 
 /*
@@ -280,6 +290,7 @@ bool sd_block_handler(const struct sd_node *sender)
 	req->work.done = cluster_op_done;
 
 	queue_work(sys->block_wqueue, &req->work);
+	req->status = REQUEST_QUEUED;
 	return true;
 }
 
@@ -323,7 +334,7 @@ void queue_cluster_request(struct request *req)
 
 		free(msg);
 	}
-
+	req->status = REQUEST_INIT;
 	return;
 error:
 	req->rp.result = ret;
@@ -921,6 +932,71 @@ static int send_join_request(struct sd_node *ent)
 	return ret;
 }
 
+static void requeue_cluster_request(void)
+{
+	struct request *req, *p;
+	struct vdi_op_message *msg;
+	size_t size;
+
+	list_for_each_entry_safe(req, p, main_thread_get(pending_notify_list),
+				 pending_list) {
+		/*
+		 * ->notify() was called and succeeded but after that
+		 * this node session-timeouted and sd_notify_handler
+		 * wasn't called from notify event handler in cluster
+		 * driver. We manually call sd_notify_handler to finish
+		 * the request.
+		 */
+		sd_dprintf("finish pending notify request, op: %s",
+			   op_name(req->op));
+		msg = prepare_cluster_msg(req, &size);
+		sd_notify_handler(&sys->this_node, msg, size);
+		free(msg);
+	}
+
+	list_for_each_entry_safe(req, p, main_thread_get(pending_block_list),
+				 pending_list) {
+		switch (req->status) {
+		case REQUEST_INIT:
+			/* this request has never been executed, re-queue it */
+			sd_dprintf("requeue a block request, op: %s",
+				   op_name(req->op));
+			list_del(&req->pending_list);
+			queue_cluster_request(req);
+			break;
+		case REQUEST_QUEUED:
+			/*
+			 * This request is being handled by the 'block' thread
+			 * and ->unblock() isn't called yet. We can't call
+			 * ->unblock thereafter because other sheep has
+			 * unblocked themselves due to cluster driver session
+			 * timeout. Mark it as dropped to stop cluster_op_done()
+			 * from calling ->unblock.
+			 */
+			sd_dprintf("drop pending block request, op: %s",
+				   op_name(req->op));
+			req->status = REQUEST_DROPPED;
+			break;
+		case REQUEST_DONE:
+			/*
+			 * ->unblock() was called and succeeded but after that
+			 * this node session-timeouted and sd_notify_handler
+			 * wasn't called from unblock event handler in cluster
+			 * driver. We manually call sd_notify_handler to finish
+			 * the request.
+			 */
+			sd_dprintf("finish pending block request, op: %s",
+				   op_name(req->op));
+			msg = prepare_cluster_msg(req, &size);
+			sd_notify_handler(&sys->this_node, msg, size);
+			free(msg);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
 int sd_reconnect_handler(void)
 {
 	sys->status = SD_STATUS_WAIT_FOR_JOIN;
@@ -929,7 +1005,7 @@ int sd_reconnect_handler(void)
 		return -1;
 	if (send_join_request(&sys->this_node) != 0)
 		return -1;
-
+	requeue_cluster_request();
 	return 0;
 }
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 382d246..1f002bf 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -55,6 +55,13 @@ struct client_info {
 	int refcnt;
 };
 
+enum REQUST_STATUS {
+	REQUEST_INIT,
+	REQUEST_QUEUED,
+	REQUEST_DONE,
+	REQUEST_DROPPED
+};
+
 struct request {
 	struct sd_req rq;
 	struct sd_rsp rp;
@@ -77,6 +84,7 @@ struct request {
 	struct vnode_info *vinfo;
 
 	struct work work;
+	enum REQUST_STATUS status;
 };
 
 struct system_info {
-- 
1.7.9.5