From: levin li <xingke.lwp at taobao.com> When gateway requests get SD_RES_OLD_NODE_VER in io_op_done(), it means epoch of gateway is older than peer, and peer has set response's epoch with its system epoch, then gateway should check whether its system epoch has reach peer's system epoch which in req->rp.epoch, and whether the result is SD_RES_OLD_NODE_VER, if so, the request should be pushed into wait_epoch_queue to wait until system epoch change, but if system epoch has reached peer's system epoch, retry this request at once. Signed-off-by: levin li <xingke.lwp at taobao.com> --- sheep/sdnet.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/sheep/sdnet.c b/sheep/sdnet.c index a8483cb..c94df89 100644 --- a/sheep/sdnet.c +++ b/sheep/sdnet.c @@ -105,8 +105,14 @@ static void io_op_done(struct work *work) case SD_RES_NETWORK_ERROR: case SD_RES_WAIT_FOR_JOIN: case SD_RES_WAIT_FOR_FORMAT: - if (!(req->rq.flags & SD_FLAG_CMD_IO_LOCAL)) - goto retry; + if (!(req->rq.flags & SD_FLAG_CMD_IO_LOCAL)) { + if (req->rp.epoch > sys->epoch && + req->rp.result == SD_RES_OLD_NODE_VER) { + list_add_tail(&req->request_list, + &sys->wait_rw_queue); + } else + goto retry; + } break; case SD_RES_EIO: if (is_access_local(req, hdr->obj.oid)) { @@ -252,9 +258,21 @@ void resume_wait_epoch_requests(void) list_for_each_entry_safe(req, t, &sys->wait_rw_queue, request_list) { - - list_del(&req->request_list); - list_add_tail(&req->request_list, &sys->request_queue); + switch (req->rp.result) { + /* gateway retries to send the request when its epoch changes. */ + case SD_RES_OLD_NODE_VER: + req->rq.epoch = sys->epoch; + put_vnode_info(req->vnodes); + req->vnodes = get_vnode_info(); + setup_access_to_local_objects(req); + /* peer retries the request locally when its epoch changes. */ + case SD_RES_NEW_NODE_VER: + list_del(&req->request_list); + list_add_tail(&req->request_list, &sys->request_queue); + break; + default: + break; + } } process_request_event_queues(); } -- 1.7.10 |