[Sheepdog] [PATCH] retry indirect I/O requests if they fail due to epoch mismatch

FUJITA Tomonori fujita.tomonori at lab.ntt.co.jp
Fri May 7 07:39:59 CEST 2010


If indirect I/O requests fail due to epoch mismatch, we need to set up
the epoch and node list of them and retry them.

Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
 collie/group.c |   10 ++++++-
 collie/net.c   |   86 +++++++++++++++++++++++++++++++++-----------------------
 2 files changed, 60 insertions(+), 36 deletions(-)

diff --git a/collie/group.c b/collie/group.c
index 9c9fb0f..dac2e5c 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -1352,6 +1352,7 @@ void start_cpg_event_work(void)
 {
 	struct cpg_event *cevent, *n;
 	LIST_HEAD(failed_req_list);
+	int retry;
 
 	if (list_empty(&sys->cpg_event_siblings))
 		vprintf(SDOG_ERR "bug\n");
@@ -1382,6 +1383,9 @@ void start_cpg_event_work(void)
 		return;
 	}
 
+do_retry:
+	retry = 0;
+
 	list_for_each_entry_safe(cevent, n, &sys->cpg_event_siblings, cpg_event_list) {
 		struct request *req = container_of(cevent, struct request, cev);
 
@@ -1416,11 +1420,15 @@ void start_cpg_event_work(void)
 	while (!list_empty(&failed_req_list)) {
 		struct request *req = list_first_entry(&failed_req_list,
 						       struct request, r_wlist);
-
 		list_del(&req->r_wlist);
 		req->done(req);
+
+		retry = 1;
 	}
 
+	if (retry)
+		goto do_retry;
+
 	if (cpg_event_running() || cpg_event_suspended() ||
 	    list_empty(&sys->cpg_event_siblings) || sys->nr_outstanding_io)
 		return;
diff --git a/collie/net.c b/collie/net.c
index d3e6ce5..f47a5f6 100644
--- a/collie/net.c
+++ b/collie/net.c
@@ -51,41 +51,6 @@ void resume_pending_requests(void)
 		start_cpg_event_work();
 }
 
-static void __done(struct work *work, int idx)
-{
-	struct request *req = container_of(work, struct request, work);
-	struct sd_req *hdr = (struct sd_req *)&req->rq;
-
-	switch (hdr->opcode) {
-	case SD_OP_NEW_VDI:
-	case SD_OP_DEL_VDI:
-	case SD_OP_LOCK_VDI:
-	case SD_OP_RELEASE_VDI:
-	case SD_OP_GET_VDI_INFO:
-	case SD_OP_MAKE_FS:
-	case SD_OP_SHUTDOWN:
-		/* request is forwarded to cpg group */
-		return;
-	}
-
-	if (is_io_request(hdr->opcode)) {
-		list_del(&req->r_wlist);
-
-		sys->nr_outstanding_io--;
-		/*
-		 * TODO: if the request failed due to epoch unmatch,
-		 * we should retry here (adds this request to the tail
-		 * of sys->cpg_event_siblings.
-		 */
-
-		resume_pending_requests();
-		resume_recovery_work();
-	}
-
-	req->done(req);
-}
-
-
 static int is_access_local(struct sheepdog_node_list_entry *e, int nr_nodes,
 			   uint64_t oid, int copies)
 {
@@ -127,6 +92,57 @@ static void setup_access_to_local_objects(struct request *req)
 		req->local_oid[1] = hdr->cow_oid;
 }
 
+static void __done(struct work *work, int idx)
+{
+	struct request *req = container_of(work, struct request, work);
+	struct sd_req *hdr = (struct sd_req *)&req->rq;
+	int again = 0;
+
+	switch (hdr->opcode) {
+	case SD_OP_NEW_VDI:
+	case SD_OP_DEL_VDI:
+	case SD_OP_LOCK_VDI:
+	case SD_OP_RELEASE_VDI:
+	case SD_OP_GET_VDI_INFO:
+	case SD_OP_MAKE_FS:
+	case SD_OP_SHUTDOWN:
+		/* request is forwarded to cpg group */
+		return;
+	}
+
+	if (is_io_request(hdr->opcode)) {
+		struct cpg_event *cevent = &req->cev;
+
+		list_del(&req->r_wlist);
+
+		sys->nr_outstanding_io--;
+		/*
+		 * TODO: if the request failed due to epoch unmatch,
+		 * we should retry here (adds this request to the tail
+		 * of sys->cpg_event_siblings.
+		 */
+
+		if (!(req->rq.flags & SD_FLAG_CMD_DIRECT) &&
+		    (req->rp.result == SD_RES_OLD_NODE_VER ||
+		     req->rp.result == SD_RES_NEW_NODE_VER)) {
+
+
+			req->rq.epoch = sys->epoch;
+			req->nr_nodes = setup_ordered_sd_node_list(req);
+			setup_access_to_local_objects(req);
+
+			list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
+			again = 1;
+		}
+
+		resume_pending_requests();
+		resume_recovery_work();
+	}
+
+	if (!again)
+		req->done(req);
+}
+
 static void queue_request(struct request *req)
 {
 	struct cpg_event *cevent = &req->cev;
-- 
1.6.5




More information about the sheepdog mailing list