[sheepdog] [PATCH] sheep: call do_cluster_request from the main thread

Wed May 16 08:58:58 CEST 2012

There are two problems with calling do_cluster_request from a work queue:

1) sys->pending_list is expected to only be used from the main thread and
   does not have any locking
2) the ->notify cluster driver metho is expected to be called from the
   main thread

Simplify call do_cluster_request directly from process_request_queue instead
of offloading it to a workqueue to fix this, and document the assumptions
in the code.

Based on an earlier patch from Yunkai Zhang <qiushu.zyk at taobao.com>.

Signed-off-by: Christoph Hellwig <hch at lst.de>

---
 sheep/group.c      |   10 ++++++----
 sheep/sdnet.c      |    8 --------
 sheep/sheep_priv.h |    1 -
 3 files changed, 6 insertions(+), 13 deletions(-)

Index: sheepdog/sheep/group.c
===================================================================

--- sheepdog.orig/sheep/group.c	2012-05-16 07:36:07.555929098 +0200
+++ sheepdog/sheep/group.c	2012-05-16 07:36:40.459928777 +0200
@@ -209,6 +209,12 @@ int get_nr_copies(struct vnode_info *vno
 	return min(vnode_info->nr_zones, sys->nr_copies);
 }
 
+/*
+ * Perform a blocked cluster operation.
+ *
+ * Must run in the main thread as it access unlocked state like
+ * sys->pending_list.
+ */
 static void do_cluster_op(void *arg)
 {
 	struct vdi_op_message *msg = arg;
@@ -228,10 +234,16 @@ static void do_cluster_op(void *arg)
 	msg->rsp.result = ret;
 }
 
-void do_cluster_request(struct work *work)
+/*
+ * Execute a cluster operation by letting the cluster driver send it to all
+ * nodes in the cluster.
+ *
+ * Must run in the main thread as it access unlocked state like
+ * sys->pending_list.
+ */
+static void do_cluster_request(struct request *req)
 {
-	struct request *req = container_of(work, struct request, work);
-	struct sd_req *hdr = (struct sd_req *)&req->rq;
+	struct sd_req *hdr = &req->rq;
 	struct vdi_op_message *msg;
 	size_t size;
 
@@ -641,6 +653,12 @@ static void __sd_notify_done(struct even
 	req_done(req);
 }
 
+/*
+ * Pass on a notification message from the cluster driver.
+ *
+ * Must run in the main thread as it access unlocked state like
+ * sys->pending_list.
+ */
 void sd_notify_handler(struct sd_node *sender, void *msg, size_t msg_len)
 {
 	struct event_struct *cevent;
@@ -1051,8 +1069,16 @@ static void process_request_queue(void)
 				queue_work(sys->io_wqueue, &req->work);
 			else
 				queue_work(sys->gateway_wqueue, &req->work);
-		} else /* (is_cluster_op(req->op) || is_local_op(req->op)) */
+		} else if (is_cluster_op(req->op)) {
+			/*
+			 * Cluster requests are handed off to the cluster driver
+			 * directly from the main thread.  It's the cluster
+			 * drivers job to ensure we avoid blocking on I/O here.
+			 */
+			do_cluster_request(req);
+		} else { /* is_local_op(req->op) */
 			queue_work(sys->io_wqueue, &req->work);
+		}
 	}
 }
 
Index: sheepdog/sheep/sdnet.c
===================================================================
--- sheepdog.orig/sheep/sdnet.c	2012-05-15 11:02:08.399956846 +0200
+++ sheepdog/sheep/sdnet.c	2012-05-16 07:36:25.263928929 +0200
@@ -156,11 +156,6 @@ static void local_op_done(struct work *w
 	req_done(req);
 }
 
-static void cluster_op_done(struct work *work)
-{
-	/* request is forwarded to cpg group */
-}
-
 static void do_local_request(struct work *work)
 {
 	struct request *req = container_of(work, struct request, work);
@@ -323,8 +318,7 @@ static void queue_request(struct request
 		req->work.fn = do_local_request;
 		req->work.done = local_op_done;
 	} else if (is_cluster_op(req->op)) {
-		req->work.fn = do_cluster_request;
-		req->work.done = cluster_op_done;
+		/* directly executed in the main thread */;
 	} else {
 		eprintf("unknown operation %d\n", hdr->opcode);
 		rsp->result = SD_RES_SYSTEM_ERROR;
Index: sheepdog/sheep/sheep_priv.h
===================================================================
--- sheepdog.orig/sheep/sheep_priv.h	2012-05-15 16:06:50.415851224 +0200
+++ sheepdog/sheep/sheep_priv.h	2012-05-16 07:36:25.263928929 +0200
@@ -259,7 +259,6 @@ int forward_write_obj_req(struct request
 
 int read_epoch(uint32_t *epoch, uint64_t *ctime,
 	       struct sd_node *entries, int *nr_entries);
-void do_cluster_request(struct work *work);
 
 int update_epoch_store(uint32_t epoch);
 int update_epoch_log(uint32_t epoch);