[sheepdog] [PATCH 2/2] sheep: remove the request_queue

Tue May 29 11:42:55 CEST 2012

Offload I/O requests to the worker threads as soon as we get them in
queue_request, or when walking one of the lists of delayed requests.

Also move the code related to queueing I/O requests from group.c and
recovery.c into sdnet.c to have it in a single place.

Signed-off-by: Christoph Hellwig <hch at lst.de>

diff --git a/sheep/group.c b/sheep/group.c
index e0642b0..75c4fec 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -859,70 +859,6 @@ int is_access_to_busy_objects(uint64_t oid)
 	return 0;
 }
 
-static int need_consistency_check(struct request *req)
-{
-	struct sd_req *hdr = &req->rq;
-
-	if (hdr->flags & SD_FLAG_CMD_IO_LOCAL)
-		/* only gateway fixes data consistency */
-		return 0;
-
-	if (hdr->opcode != SD_OP_READ_OBJ)
-		/* consistency is fixed when clients read data for the
-		 * first time */
-		return 0;
-
-	if (hdr->flags & SD_FLAG_CMD_WEAK_CONSISTENCY)
-		return 0;
-
-	if (is_vdi_obj(hdr->obj.oid))
-		/* only check consistency for data objects */
-		return 0;
-
-	if (sys->enable_write_cache && object_is_cached(hdr->obj.oid))
-		/* we don't check consistency for cached objects */
-		return 0;
-
-	return 1;
-}
-
-static inline void set_consistency_check(struct request *req)
-{
-	uint32_t vdi_id = oid_to_vid(req->rq.obj.oid);
-	uint32_t idx = data_oid_to_idx(req->rq.obj.oid);
-	struct data_object_bmap *bmap;
-
-	req->check_consistency = 1;
-	list_for_each_entry(bmap, &sys->consistent_obj_list, list) {
-		if (bmap->vdi_id == vdi_id) {
-			if (test_bit(idx, bmap->dobjs))
-				req->check_consistency = 0;
-			break;
-		}
-	}
-}
-
-/* can be called only by the main process */
-void process_request_event_queues(void)
-{
-	struct request *req, *n;
-
-	list_for_each_entry_safe(req, n, &sys->request_queue, request_list) {
-		list_del(&req->request_list);
-
-		list_add_tail(&req->request_list,
-			      &sys->outstanding_req_list);
-
-		if (need_consistency_check(req))
-			set_consistency_check(req);
-
-		if (req->rq.flags & SD_FLAG_CMD_IO_LOCAL)
-			queue_work(sys->io_wqueue, &req->work);
-		else
-			queue_work(sys->gateway_wqueue, &req->work);
-	}
-}
-
 void sd_join_handler(struct sd_node *joined, struct sd_node *members,
 		size_t nr_members, enum cluster_join_result result,
 		void *opaque)
@@ -1103,7 +1039,6 @@ int create_cluster(int port, int64_t zone, int nr_vnodes)
 	INIT_LIST_HEAD(&sys->consistent_obj_list);
 	INIT_LIST_HEAD(&sys->blocking_conn_list);
 
-	INIT_LIST_HEAD(&sys->request_queue);
 	INIT_LIST_HEAD(&sys->wait_rw_queue);
 	INIT_LIST_HEAD(&sys->wait_obj_queue);
 
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 9ee876f..6fd69a4 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -378,26 +378,6 @@ int is_recoverying_oid(uint64_t oid)
 	return 0;
 }
 
-static void resume_wait_recovery_requests(void)
-{
-	struct request *req, *t;
-
-	list_for_each_entry_safe(req, t, &sys->wait_rw_queue,
-				 request_list) {
-		dprintf("resume wait oid %" PRIx64 "\n", req->local_oid);
-		if (req->rp.result == SD_RES_OBJ_RECOVERING)
-			list_move_tail(&req->request_list, &sys->request_queue);
-	}
-
-	process_request_event_queues();
-}
-
-static void flush_wait_obj_requests(void)
-{
-	list_splice_tail_init(&sys->wait_obj_queue, &sys->request_queue);
-	process_request_event_queues();
-}
-
 static void free_recovery_work(struct recovery_work *rw)
 {
 	put_vnode_info(rw->cur_vnodes);
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index bf27225..5bda99a 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -55,6 +55,45 @@ static void setup_access_to_local_objects(struct request *req)
 			req->local_cow_oid = hdr->obj.cow_oid;
 }
 
+static int need_consistency_check(struct request *req)
+{
+	struct sd_req *hdr = &req->rq;
+
+	if (hdr->opcode != SD_OP_READ_OBJ)
+		/* consistency is fixed when clients read data for the
+		 * first time */
+		return 0;
+
+	if (hdr->flags & SD_FLAG_CMD_WEAK_CONSISTENCY)
+		return 0;
+
+	if (is_vdi_obj(hdr->obj.oid))
+		/* only check consistency for data objects */
+		return 0;
+
+	if (sys->enable_write_cache && object_is_cached(hdr->obj.oid))
+		/* we don't check consistency for cached objects */
+		return 0;
+
+	return 1;
+}
+
+static inline void set_consistency_check(struct request *req)
+{
+	uint32_t vdi_id = oid_to_vid(req->rq.obj.oid);
+	uint32_t idx = data_oid_to_idx(req->rq.obj.oid);
+	struct data_object_bmap *bmap;
+
+	req->check_consistency = 1;
+	list_for_each_entry(bmap, &sys->consistent_obj_list, list) {
+		if (bmap->vdi_id == vdi_id) {
+			if (test_bit(idx, bmap->dobjs))
+				req->check_consistency = 0;
+			break;
+		}
+	}
+}
+
 static void check_object_consistency(struct sd_req *hdr)
 {
 	uint32_t vdi_id = oid_to_vid(hdr->obj.oid);
@@ -90,6 +129,19 @@ static void check_object_consistency(struct sd_req *hdr)
 	}
 }
 
+static void process_io_request(struct request *req)
+{
+	list_add_tail(&req->request_list, &sys->outstanding_req_list);
+
+	if (req->rq.flags & SD_FLAG_CMD_IO_LOCAL) {
+		queue_work(sys->io_wqueue, &req->work);
+	} else {
+		if (need_consistency_check(req))
+			set_consistency_check(req);
+		queue_work(sys->gateway_wqueue, &req->work);
+	}
+}
+
 static void io_op_done(struct work *work)
 {
 	struct request *req = container_of(work, struct request, work);
@@ -142,7 +194,7 @@ retry:
 	put_vnode_info(req->vnodes);
 	req->vnodes = get_vnode_info();
 	setup_access_to_local_objects(req);
-	list_add_tail(&req->request_list, &sys->request_queue);
+	process_io_request(req);
 
 	resume_pending_requests();
 	resume_recovery_work();
@@ -251,11 +303,8 @@ void resume_pending_requests(void)
 
 		if (check_request(req) < 0)
 			continue;
-		list_add_tail(&req->request_list, &sys->request_queue);
+		process_io_request(req);
 	}
-
-	if (!list_empty(&sys->request_queue))
-		process_request_event_queues();
 }
 
 void resume_wait_epoch_requests(void)
@@ -274,13 +323,27 @@ void resume_wait_epoch_requests(void)
 			setup_access_to_local_objects(req);
 		/* peer retries the request locally when its epoch changes. */
 		case SD_RES_NEW_NODE_VER:
-			list_move_tail(&req->request_list, &sys->request_queue);
+			list_del(&req->request_list);
+			process_io_request(req);
 			break;
 		default:
 			break;
 		}
 	}
-	process_request_event_queues();
+}
+
+void resume_wait_recovery_requests(void)
+{
+	struct request *req, *t;
+
+	list_for_each_entry_safe(req, t, &sys->wait_rw_queue,
+				 request_list) {
+		dprintf("resume wait oid %" PRIx64 "\n", req->local_oid);
+		if (req->rp.result == SD_RES_OBJ_RECOVERING) {
+			list_del(&req->request_list);
+			process_io_request(req);
+		}
+	}
 }
 
 void resume_wait_obj_requests(uint64_t oid)
@@ -293,10 +356,20 @@ void resume_wait_obj_requests(uint64_t oid)
 		 * recovered, notify the pending request. */
 		if (req->local_oid == oid) {
 			dprintf("retry %" PRIx64 "\n", req->local_oid);
-			list_move_tail(&req->request_list, &sys->request_queue);
+			list_del(&req->request_list);
+			process_io_request(req);
 		}
 	}
-	process_request_event_queues();
+}
+
+void flush_wait_obj_requests(void)
+{
+	struct request *req, *n;
+
+	list_for_each_entry_safe(req, n, &sys->wait_obj_queue, request_list) {
+		list_del(&req->request_list);
+		process_io_request(req);
+	}
 }
 
 static void queue_io_request(struct request *req)
@@ -313,8 +386,7 @@ static void queue_io_request(struct request *req)
 	if (check_request(req) < 0)
 		return;
 
-	list_add_tail(&req->request_list, &sys->request_queue);
-	process_request_event_queues();
+	process_io_request(req);
 }
 
 static void queue_local_request(struct request *req)
@@ -365,13 +437,14 @@ static void queue_request(struct request *req)
 	}
 
 	/*
-	 * we set epoch for non direct requests here. Note that we
-	 * can't access to sys->epoch after calling
-	 * process_request_event_queues(that is, passing requests to work
-	 * threads).
+	 * we set epoch for non direct requests here.  Note that we need to
+	 * sample sys->epoch before passing requests to worker threads as
+	 * it can change anytime we return to processing membership change
+	 * events.
 	 */
 	if (!(hdr->flags & SD_FLAG_CMD_IO_LOCAL))
 		hdr->epoch = sys->epoch;
+
 	/*
 	 * force operations shouldn't access req->vnodes in their
 	 * process_work() and process_main() because they can be
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 96b6698..dba9541 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -128,7 +128,6 @@ struct cluster_info {
 
 	int nr_copies;
 
-	struct list_head request_queue;
 	struct list_head wait_rw_queue;
 	struct list_head wait_obj_queue;
 	int nr_outstanding_reqs;
@@ -250,11 +249,12 @@ int is_access_to_busy_objects(uint64_t oid);
 void resume_pending_requests(void);
 void resume_wait_epoch_requests(void);
 void resume_wait_obj_requests(uint64_t oid);
+void resume_wait_recovery_requests(void);
+void flush_wait_obj_requests(void);
 
 int create_cluster(int port, int64_t zone, int nr_vnodes);
 int leave_cluster(void);
 
-void process_request_event_queues(void);
 void queue_cluster_request(struct request *req);
 void do_io_request(struct work *work);
 void do_gateway_request(struct work *work);