[sheepdog] [PATCH 4/8] make IO request to wait when recovery is in RW_INIT

Tue May 22 04:51:04 CEST 2012

From: levin li <xingke.lwp at taobao.com>

When recovery is in RW_INIT state, the IO request would be
marked as recovering, then the sender will busy retrying,
we should make the request wait until recovery enters RW_RUN
state to determine whether the object requested is in recovery
instead of just return SD_RES_NEW_NODE_VER.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 sheep/group.c      |    1 +
 sheep/recovery.c   |   25 +++++++++++++++++++++++--
 sheep/sdnet.c      |   11 ++++++++---
 sheep/sheep_priv.h |    2 ++
 4 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/sheep/group.c b/sheep/group.c
index 3266c38..63742a2 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -1379,6 +1379,7 @@ int create_cluster(int port, int64_t zone, int nr_vnodes)
 	INIT_LIST_HEAD(&sys->request_queue);
 	INIT_LIST_HEAD(&sys->event_queue);
 	INIT_LIST_HEAD(&sys->wait_epoch_queue);
+	INIT_LIST_HEAD(&sys->wait_rw_queue);
 
 	ret = send_join_request(&sys->this_node);
 	if (ret != 0)
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 3b5caa3..0d05661 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -526,6 +526,13 @@ int node_in_recovery(void)
 	return !!recovering_work;
 }
 
+int is_recovery_init(void)
+{
+	struct recovery_work *rw = recovering_work;
+
+	return rw->state == RW_INIT;
+}
+
 int is_recovering_oid(uint64_t oid)
 {
 	struct recovery_work *rw = recovering_work;
@@ -573,6 +580,19 @@ int is_recovering_oid(uint64_t oid)
 	return 0;
 }
 
+static void resume_wait_recovery_queue(void)
+{
+	struct request *req, *t;
+
+	list_for_each_entry_safe(req, t, &sys->wait_rw_queue,
+				 request_list) {
+		dprintf("resume wait oid %" PRIx64 "\n", req->local_oid);
+		list_del(&req->request_list);
+		list_add_tail(&req->request_list, &sys->request_queue);
+		process_request_event_queues();
+	}
+}
+
 static void do_recover_main(struct work *work)
 {
 	struct recovery_work *rw = container_of(work, struct recovery_work, work);
@@ -582,9 +602,10 @@ static void do_recover_main(struct work *work)
 
 again:
 	if (rw->prior_count == 0) {
-		if (rw->state == RW_INIT)
+		if (rw->state == RW_INIT) {
 			rw->state = RW_RUN;
-		else if (!rw->retry)
+			resume_wait_recovery_queue();
+		} else if (!rw->retry)
 			rw->done++;
 	}
 
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index e1334c8..565625e 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -227,9 +227,14 @@ static int check_request(struct request *req)
 	if (is_recovering_oid(req->local_oid)) {
 		if (req->rq.flags & SD_FLAG_CMD_IO_LOCAL) {
 			/* Sheep peer request */
-			req->rp.result = SD_RES_NEW_NODE_VER;
-			sys->nr_outstanding_io++;
-			req->work.done(&req->work);
+			if (is_recovery_init())
+				list_add_tail(&req->request_list,
+						&sys->wait_rw_queue);
+			else
+				req->rp.result = SD_RES_NEW_NODE_VER;
+				sys->nr_outstanding_io++;
+				req->work.done(&req->work);
+			}
 		} else {
 			/* Gateway request */
 			list_add_tail(&req->request_list, &sys->req_wait_for_obj_list);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 00c299a..5e804c4 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -137,6 +137,7 @@ struct cluster_info {
 	struct list_head request_queue;
 	struct list_head event_queue;
 	struct list_head wait_epoch_queue;
+	struct list_head wait_rw_queue;
 	struct event_struct *cur_cevent;
 	int nr_outstanding_io;
 	int nr_outstanding_reqs;
@@ -300,6 +301,7 @@ int get_obj_list(const struct sd_list_req *, struct sd_list_rsp *, void *);
 int start_recovery(uint32_t epoch);
 void resume_recovery_work(void);
 int is_recovering_oid(uint64_t oid);
+int is_recovery_init(void);
 int node_in_recovery(void);
 
 int write_object(struct vnode_info *vnodes, uint32_t node_version,
-- 
1.7.10