[sheepdog] [PATCH 4/4] cluster driver: handle pending block/notify event during reconnect
Kai Zhang
kyle at zelin.io
Sat Jul 6 03:59:08 CEST 2013
Current implementation of reconnection doesn't handle pending block/notify
event.
It is easy to handle notify event by sending it again.
However, it is a little bit complex for block event.
This is because a block event need 4 steps.
1. in queue_cluster_request(), send block event by sys->cdrv->block(), and
add to pending_block_list.
2. in sd_block_handler(), queue the event to work queue of 'block' thread.
3. in cluster_op_done(), send unblock event by sys->cdrv->unblock().
4. in sd_notify_handler(), remove it from pending_block_list.
And step 1 and 3 contains broadcast operations.
So we have to know which step has been done for a pending block event.
If step 1 has been done, we can re-queue it simply. (Any block event which sent
by this node have been removed due to the leave event)
If step 2 has been done, the event is handling by another thread. We have to mark
it as 'drop' so that it will be dropped when cluster_op_done() is called later.
if step 3 has been done, we should call sd_notify_handler() manually to finish
it.
Signed-off-by: Kai Zhang <kyle at zelin.io>
---
sheep/group.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++--
sheep/sheep_priv.h | 8 ++++++
2 files changed, 75 insertions(+), 2 deletions(-)
diff --git a/sheep/group.c b/sheep/group.c
index 2fa4091..1a549de 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -251,6 +251,9 @@ static void cluster_op_done(struct work *work)
struct vdi_op_message *msg;
size_t size;
+ if (req->status == REQUEST_DROPPED)
+ goto drop;
+
sd_dprintf("%s (%p)", op_name(req->op), req);
msg = prepare_cluster_msg(req, &size);
@@ -266,6 +269,13 @@ static void cluster_op_done(struct work *work)
}
free(msg);
+ req->status = REQUEST_DONE;
+ return;
+drop:
+ list_del(&req->pending_list);
+ req->rp.result = SD_RES_CLUSTER_ERROR;
+ put_request(req);
+ cluster_op_running = false;
}
/*
@@ -295,6 +305,7 @@ bool sd_block_handler(const struct sd_node *sender)
req->work.done = cluster_op_done;
queue_work(sys->block_wqueue, &req->work);
+ req->status = REQUEST_QUEUED;
return true;
}
@@ -329,7 +340,7 @@ void queue_cluster_request(struct request *req)
free(msg);
}
-
+ req->status = REQUEST_INIT;
return;
error:
req->rp.result = SD_RES_CLUSTER_ERROR;
@@ -927,6 +938,60 @@ static int send_join_request(struct sd_node *ent)
return ret;
}
+static void requeue_cluster_request(void)
+{
+ struct request *req, *p;
+ struct vdi_op_message *msg;
+ size_t size;
+
+ list_for_each_entry_safe(req, p, main_thread_get(pending_notify_list),
+ pending_list) {
+ sd_dprintf("found an pending notify request, op: %s",
+ op_name(req->op));
+ /*
+ * notify has been sent but sd_notify_handler is never called,
+ * re-queue it
+ */
+ list_del(&req->pending_list);
+ queue_cluster_request(req);
+ }
+
+ list_for_each_entry_safe(req, p, main_thread_get(pending_block_list),
+ pending_list) {
+ switch (req->status) {
+ case REQUEST_INIT:
+ /* this request has never been executed, re-queue it */
+ sd_dprintf("requeue a block request, op: %s",
+ op_name(req->op));
+ list_del(&req->pending_list);
+ queue_cluster_request(req);
+ break;
+ case REQUEST_QUEUED:
+ /*
+ * This request is handling by the 'block' thread.
+ * Don't send unblock event, because other sheep has
+ * unblocked it. Drop it when cluster_op_done() is
+ * called.
+ */
+ sd_dprintf("drop pending block request, op: %s",
+ op_name(req->op));
+ req->status = REQUEST_DROPPED;
+ break;
+ case REQUEST_DONE:
+ /*
+ * Unblock has been sent but sd_notify_handler is never
+ * called, call sd_notify_handler to finish it.
+ */
+ sd_dprintf("finish pending block request, op: %s",
+ op_name(req->op));
+ msg = prepare_cluster_msg(req, &size);
+ sd_notify_handler(&sys->this_node, msg, size);
+ free(msg);
+ break;
+ }
+ }
+}
+
int sd_reconnect_handler(void)
{
sys->status = SD_STATUS_WAIT_FOR_JOIN;
@@ -935,7 +1000,7 @@ int sd_reconnect_handler(void)
return -1;
if (send_join_request(&sys->this_node) != 0)
return -1;
-
+ requeue_cluster_request();
return 0;
}
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index c406534..d2b5364 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -39,6 +39,13 @@ struct client_info {
int refcnt;
};
+enum REQUST_STATUS {
+ REQUEST_INIT,
+ REQUEST_QUEUED,
+ REQUEST_DONE,
+ REQUEST_DROPPED
+};
+
struct request {
struct sd_req rq;
struct sd_rsp rp;
@@ -61,6 +68,7 @@ struct request {
struct vnode_info *vinfo;
struct work work;
+ int status;
};
struct system_info {
--
1.7.9.5
More information about the sheepdog
mailing list