From: Yunkai Zhang <qiushu.zyk at taobao.com> Actually, there are two race problems when we call do_cluster_request() in IO threads: 1) race on sys->pending_list which would also be updated in sd_notify_handler(). 2) calling sys->notify() in IO threads other than main thread is also mistake. So I move do_cluster_request() into cluster_op_done(). Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com> --- sheep/sdnet.c | 5 ++++- 1 files changed, 4 insertions(+), 1 deletions(-) diff --git a/sheep/sdnet.c b/sheep/sdnet.c index 8aad8f9..10be245 100644 --- a/sheep/sdnet.c +++ b/sheep/sdnet.c @@ -174,6 +174,7 @@ static void local_op_done(struct work *work) static void cluster_op_done(struct work *work) { /* request is forwarded to cpg group */ + do_cluster_request(work); } static void do_local_request(struct work *work) @@ -255,6 +256,8 @@ static int check_request(struct request *req) return 0; } +static do_nothing(struct work *work) {} + static void queue_request(struct request *req) { struct event_struct *cevent = &req->cev; @@ -323,7 +326,7 @@ static void queue_request(struct request *req) req->work.fn = do_local_request; req->work.done = local_op_done; } else if (is_cluster_op(req->op)) { - req->work.fn = do_cluster_request; + req->work.fn = do_nothing; req->work.done = cluster_op_done; } else { eprintf("unknown operation %d\n", hdr->opcode); -- 1.7.7.6 |