[sheepdog] [PATCH v4 4/8] recovery: make IO request to wait when recovery is in RW_INIT

Fri May 25 04:30:56 CEST 2012

From: levin li <xingke.lwp at taobao.com>

When recovery is in RW_INIT state, the IO request would be
marked as recovering, then the sender will busy retrying,
we should make the request wait until recovery enters RW_RUN
state to determine whether the object requested is in recovery
instead of just return SD_RES_NEW_NODE_VER.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 include/sheepdog_proto.h |    1 +
 sheep/recovery.c         |   34 +++++++++++++++++++++++++++++-----
 sheep/sdnet.c            |   12 +++++++++---
 sheep/sheep_priv.h       |    1 +
 4 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 0cd8d9d..e20254e 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -65,6 +65,7 @@
 #define SD_RES_NO_STORE         0x20 /* No targeted backend store */
 #define SD_RES_NO_SUPPORT       0x21 /* Operation is not supported by backend store */
 #define SD_RES_CLUSTER_RECOVERING 0x22 /* Cluster is recovering. */
+#define SD_RES_OBJ_RECOVERING     0x23 /* Object is recovering */
 
 /*
  * Object ID rules
diff --git a/sheep/recovery.c b/sheep/recovery.c
index de4bc62..72c74c7 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -329,6 +329,13 @@ int node_in_recovery(void)
 	return !!recovering_work;
 }
 
+int is_recovery_init(void)
+{
+	struct recovery_work *rw = recovering_work;
+
+	return rw->state == RW_INIT;
+}
+
 int is_recoverying_oid(uint64_t oid)
 {
 	uint64_t hval = fnv_64a_buf(&oid, sizeof(uint64_t), FNV1A_64_INIT);
@@ -347,14 +354,14 @@ int is_recoverying_oid(uint64_t oid)
 	if (before(rw->epoch, sys->epoch))
 		return 1;
 
-	if (rw->state == RW_INIT)
-		return 1;
-
 	if (sd_store->exist(oid)) {
 		dprintf("the object %" PRIx64 " is already recoverd\n", oid);
 		return 0;
 	}
 
+	if (rw->state == RW_INIT)
+		return 1;
+
 	/* the first 'rw->nr_blocking' objects were already scheduled to be done earlier */
 	for (i = 0; i < rw->nr_blocking; i++)
 		if (rw->oids[rw->done + i] == oid)
@@ -384,14 +391,31 @@ int is_recoverying_oid(uint64_t oid)
 	return 0;
 }
 
+static void resume_wait_recovery_requests(void)
+{
+	struct request *req, *t;
+
+	list_for_each_entry_safe(req, t, &sys->wait_rw_queue,
+				 request_list) {
+		dprintf("resume wait oid %" PRIx64 "\n", req->local_oid);
+		if (req->rp.result == SD_RES_OBJ_RECOVERING) {
+			list_del(&req->request_list);
+			list_add_tail(&req->request_list, &sys->request_queue);
+		}
+	}
+
+	process_request_event_queues();
+}
+
 static void do_recover_main(struct work *work)
 {
 	struct recovery_work *rw = container_of(work, struct recovery_work, work);
 	uint64_t oid;
 
-	if (rw->state == RW_INIT)
+	if (rw->state == RW_INIT) {
 		rw->state = RW_RUN;
-	else if (!rw->retry) {
+		resume_wait_recovery_requests();
+	} else if (!rw->retry) {
 		rw->done++;
 		if (rw->nr_blocking > 0)
 			rw->nr_blocking--;
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index c94df89..0e127d1 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -215,9 +215,15 @@ static int check_request(struct request *req)
 	if (is_recoverying_oid(req->local_oid)) {
 		if (req->rq.flags & SD_FLAG_CMD_IO_LOCAL) {
 			/* Sheep peer request */
-			req->rp.result = SD_RES_NEW_NODE_VER;
-			sys->nr_outstanding_io++;
-			req->work.done(&req->work);
+			if (is_recovery_init()) {
+				req->rp.result = SD_RES_OBJ_RECOVERING;
+				list_add_tail(&req->request_list,
+						&sys->wait_rw_queue);
+			} else {
+				req->rp.result = SD_RES_NEW_NODE_VER;
+				sys->nr_outstanding_io++;
+				req->work.done(&req->work);
+			}
 		} else {
 			/* Gateway request */
 			list_add_tail(&req->request_list, &sys->req_wait_for_obj_list);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index ba29a2a..b9ae438 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -305,6 +305,7 @@ int get_obj_list(const struct sd_list_req *, struct sd_list_rsp *, void *);
 int start_recovery(uint32_t epoch);
 void resume_recovery_work(void);
 int is_recoverying_oid(uint64_t oid);
+int is_recovery_init(void);
 int node_in_recovery(void);
 
 int write_object(struct vnode_info *vnodes, uint32_t node_version,
-- 
1.7.10