[sheepdog] [PATCH v2 1/7] sheep: make requests with new epoch sleep until epoch is updated
levin li
levin108 at gmail.com
Wed May 23 09:02:23 CEST 2012
From: levin li <xingke.lwp at taobao.com>
If requests comes with epoch newer than system epoch, then
we shouldn't just make it done with result SD_RES_NEW_NODE_VER,
if so, the sender would busy retrying this request, which may
casue CPU too busy to process other request.
We push the requests with new epoch into a wait_epoch_queue to
make it wait for epoch consistency, after epoch changes we wake
up these requests in the queue, which avoids busy retrying.
Signed-off-by: levin li <xingke.lwp at taobao.com>
---
sheep/group.c | 1 +
sheep/recovery.c | 2 ++
sheep/sdnet.c | 30 ++++++++++++++++++++++++++----
sheep/sheep_priv.h | 2 ++
4 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/sheep/group.c b/sheep/group.c
index 7f3bedb..9052cf9 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -1378,6 +1378,7 @@ int create_cluster(int port, int64_t zone, int nr_vnodes)
INIT_LIST_HEAD(&sys->request_queue);
INIT_LIST_HEAD(&sys->event_queue);
+ INIT_LIST_HEAD(&sys->wait_rw_queue);
ret = send_join_request(&sys->this_node);
if (ret != 0)
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 55bb122..b7c0438 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -832,5 +832,7 @@ int start_recovery(uint32_t epoch)
queue_work(sys->recovery_wqueue, &rw->work);
}
+ resume_wait_epoch_requests();
+
return 0;
}
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index a13b3e3..c1058fb 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -174,14 +174,23 @@ static int check_epoch(struct request *req)
int ret = SD_RES_SUCCESS;
if (before(req_epoch, sys->epoch)) {
- ret = SD_RES_OLD_NODE_VER;
eprintf("old node version %u, %u, %x\n",
sys->epoch, req_epoch, opcode);
+ /* make gateway to retry. */
+ req->rp.result = SD_RES_OLD_NODE_VER;
+ req->rp.epoch = sys->epoch;
+ req->work.done(&req->work);
+ ret = req->rp.result;
} else if (after(req_epoch, sys->epoch)) {
- ret = SD_RES_NEW_NODE_VER;
eprintf("new node version %u, %u, %x\n",
sys->epoch, req_epoch, opcode);
+
+ /* wait for epoch consistency. */
+ req->rp.result = SD_RES_NEW_NODE_VER;
+ list_add_tail(&req->request_list, &sys->wait_rw_queue);
+ ret = req->rp.result;
}
+
return ret;
}
@@ -201,9 +210,7 @@ static int check_request(struct request *req)
else {
int ret = check_epoch(req);
if (ret != SD_RES_SUCCESS) {
- req->rp.result = ret;
sys->nr_outstanding_io++;
- req->work.done(&req->work);
return -1;
}
}
@@ -251,6 +258,21 @@ void resume_pending_requests(void)
process_request_event_queues();
}
+void resume_wait_epoch_requests(void)
+{
+ struct request *req, *t;
+
+ /* Request must have result SD_RES_OLD_NODE_VER
+ * or SD_RES_NEW_NODE_VER in sys->wait_epoch_queue. */
+ list_for_each_entry_safe(req, t, &sys->wait_rw_queue,
+ request_list) {
+
+ list_del(&req->request_list);
+ list_add_tail(&req->request_list, &sys->request_queue);
+ process_request_event_queues();
+ }
+}
+
static void queue_request(struct request *req)
{
struct sd_req *hdr = &req->rq;
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 21ee282..d2e1379 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -136,6 +136,7 @@ struct cluster_info {
struct list_head request_queue;
struct list_head event_queue;
+ struct list_head wait_rw_queue;
struct event_struct *cur_cevent;
int nr_outstanding_io;
int nr_outstanding_reqs;
@@ -263,6 +264,7 @@ int get_nr_copies(struct vnode_info *vnode_info);
int is_access_to_busy_objects(uint64_t oid);
void resume_pending_requests(void);
+void resume_wait_epoch_requests(void);
int create_cluster(int port, int64_t zone, int nr_vnodes);
int leave_cluster(void);
--
1.7.10
More information about the sheepdog
mailing list