[Sheepdog] [PATCH] retry indirect I/O requests if they fail due to epoch mismatch
FUJITA Tomonori
fujita.tomonori at lab.ntt.co.jp
Fri May 7 07:39:59 CEST 2010
If indirect I/O requests fail due to epoch mismatch, we need to set up
the epoch and node list of them and retry them.
Signed-off-by: FUJITA Tomonori <fujita.tomonori at lab.ntt.co.jp>
---
collie/group.c | 10 ++++++-
collie/net.c | 86 +++++++++++++++++++++++++++++++++-----------------------
2 files changed, 60 insertions(+), 36 deletions(-)
diff --git a/collie/group.c b/collie/group.c
index 9c9fb0f..dac2e5c 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -1352,6 +1352,7 @@ void start_cpg_event_work(void)
{
struct cpg_event *cevent, *n;
LIST_HEAD(failed_req_list);
+ int retry;
if (list_empty(&sys->cpg_event_siblings))
vprintf(SDOG_ERR "bug\n");
@@ -1382,6 +1383,9 @@ void start_cpg_event_work(void)
return;
}
+do_retry:
+ retry = 0;
+
list_for_each_entry_safe(cevent, n, &sys->cpg_event_siblings, cpg_event_list) {
struct request *req = container_of(cevent, struct request, cev);
@@ -1416,11 +1420,15 @@ void start_cpg_event_work(void)
while (!list_empty(&failed_req_list)) {
struct request *req = list_first_entry(&failed_req_list,
struct request, r_wlist);
-
list_del(&req->r_wlist);
req->done(req);
+
+ retry = 1;
}
+ if (retry)
+ goto do_retry;
+
if (cpg_event_running() || cpg_event_suspended() ||
list_empty(&sys->cpg_event_siblings) || sys->nr_outstanding_io)
return;
diff --git a/collie/net.c b/collie/net.c
index d3e6ce5..f47a5f6 100644
--- a/collie/net.c
+++ b/collie/net.c
@@ -51,41 +51,6 @@ void resume_pending_requests(void)
start_cpg_event_work();
}
-static void __done(struct work *work, int idx)
-{
- struct request *req = container_of(work, struct request, work);
- struct sd_req *hdr = (struct sd_req *)&req->rq;
-
- switch (hdr->opcode) {
- case SD_OP_NEW_VDI:
- case SD_OP_DEL_VDI:
- case SD_OP_LOCK_VDI:
- case SD_OP_RELEASE_VDI:
- case SD_OP_GET_VDI_INFO:
- case SD_OP_MAKE_FS:
- case SD_OP_SHUTDOWN:
- /* request is forwarded to cpg group */
- return;
- }
-
- if (is_io_request(hdr->opcode)) {
- list_del(&req->r_wlist);
-
- sys->nr_outstanding_io--;
- /*
- * TODO: if the request failed due to epoch unmatch,
- * we should retry here (adds this request to the tail
- * of sys->cpg_event_siblings.
- */
-
- resume_pending_requests();
- resume_recovery_work();
- }
-
- req->done(req);
-}
-
-
static int is_access_local(struct sheepdog_node_list_entry *e, int nr_nodes,
uint64_t oid, int copies)
{
@@ -127,6 +92,57 @@ static void setup_access_to_local_objects(struct request *req)
req->local_oid[1] = hdr->cow_oid;
}
+static void __done(struct work *work, int idx)
+{
+ struct request *req = container_of(work, struct request, work);
+ struct sd_req *hdr = (struct sd_req *)&req->rq;
+ int again = 0;
+
+ switch (hdr->opcode) {
+ case SD_OP_NEW_VDI:
+ case SD_OP_DEL_VDI:
+ case SD_OP_LOCK_VDI:
+ case SD_OP_RELEASE_VDI:
+ case SD_OP_GET_VDI_INFO:
+ case SD_OP_MAKE_FS:
+ case SD_OP_SHUTDOWN:
+ /* request is forwarded to cpg group */
+ return;
+ }
+
+ if (is_io_request(hdr->opcode)) {
+ struct cpg_event *cevent = &req->cev;
+
+ list_del(&req->r_wlist);
+
+ sys->nr_outstanding_io--;
+ /*
+ * TODO: if the request failed due to epoch unmatch,
+ * we should retry here (adds this request to the tail
+ * of sys->cpg_event_siblings.
+ */
+
+ if (!(req->rq.flags & SD_FLAG_CMD_DIRECT) &&
+ (req->rp.result == SD_RES_OLD_NODE_VER ||
+ req->rp.result == SD_RES_NEW_NODE_VER)) {
+
+
+ req->rq.epoch = sys->epoch;
+ req->nr_nodes = setup_ordered_sd_node_list(req);
+ setup_access_to_local_objects(req);
+
+ list_add_tail(&cevent->cpg_event_list, &sys->cpg_event_siblings);
+ again = 1;
+ }
+
+ resume_pending_requests();
+ resume_recovery_work();
+ }
+
+ if (!again)
+ req->done(req);
+}
+
static void queue_request(struct request *req)
{
struct cpg_event *cevent = &req->cev;
--
1.6.5
More information about the sheepdog
mailing list