[sheepdog] [PATCH 1/2] sheep: fix forward_write_obj_req()
Liu Yuan
namei.unix at gmail.com
Thu Jun 7 11:29:35 CEST 2012
From: Liu Yuan <tailai.ly at taobao.com>
This patch addresses one very sutble problem, to quote from Kazutaka:
One possibility is that if forward_write_obj_req() fails before
receiving data, the next forward_(read|write)_obj_req() could be
interleaved.
The interleaved requets will return random res->result and sometimes more
catastrophic, EIO to upper layer and askes the node to leave the cluster.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/gateway.c | 44 +++++++++++++++++++++-----------------------
1 file changed, 21 insertions(+), 23 deletions(-)
diff --git a/sheep/gateway.c b/sheep/gateway.c
index c2ec901..debe569 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -122,9 +122,10 @@ int forward_write_obj_req(struct request *req)
fd = get_sheep_fd(v->addr, v->port, v->node_idx, fwd_hdr.epoch);
if (fd < 0) {
- eprintf("failed to connect to %s:%"PRIu32"\n", name, v->port);
+ eprintf("failed to connect to %s:%"PRIu32"\n", name,
+ v->port);
ret = SD_RES_NETWORK_ERROR;
- goto out;
+ goto err;
}
ret = send_req(fd, &fwd_hdr, req->data, &wlen);
@@ -132,7 +133,7 @@ int forward_write_obj_req(struct request *req)
del_sheep_fd(fd);
ret = SD_RES_NETWORK_ERROR;
dprintf("fail %"PRIu32"\n", ret);
- goto out;
+ goto err;
}
pfds[nr_fds].fd = fd;
@@ -144,16 +145,14 @@ int forward_write_obj_req(struct request *req)
ret = do_local_io(req, fwd_hdr.epoch);
rsp->result = ret;
- if (nr_fds == 0) {
- eprintf("exit %"PRIu32"\n", ret);
- goto out;
- }
-
if (rsp->result != SD_RES_SUCCESS) {
- eprintf("fail %"PRIu32"\n", ret);
+ eprintf("fail to write local %"PRIu32"\n", ret);
ret = rsp->result;
- goto out;
+ goto err;
}
+
+ if (nr_fds == 0)
+ goto out;
}
ret = SD_RES_SUCCESS;
@@ -163,22 +162,18 @@ again:
if (errno == EINTR)
goto again;
- ret = SD_RES_EIO;
- } else if (pollret == 0) { /* poll time out */
+ ret = SD_RES_NETWORK_ERROR;
+ goto err;
+ } else if (pollret == 0) {
+ /* poll time out */
eprintf("timeout\n");
-
- for (i = 0; i < nr_fds; i++)
- del_sheep_fd(pfds[i].fd);
-
ret = SD_RES_NETWORK_ERROR;
- goto out;
+ goto err;
}
for (i = 0; i < nr_fds; i++) {
- if (pfds[i].fd < 0)
- break;
-
- if (pfds[i].revents & POLLERR || pfds[i].revents & POLLHUP || pfds[i].revents & POLLNVAL) {
+ if (pfds[i].revents & POLLERR || pfds[i].revents & POLLHUP ||
+ pfds[i].revents & POLLNVAL) {
del_sheep_fd(pfds[i].fd);
ret = SD_RES_NETWORK_ERROR;
break;
@@ -208,11 +203,14 @@ again:
dprintf("%"PRIx64" %"PRIu32"\n", oid, nr_fds);
- if (nr_fds > 0) {
+ if (nr_fds > 0)
goto again;
- }
out:
return ret;
+err:
+ for (i = 0; i < nr_fds; i++)
+ del_sheep_fd(pfds[i].fd);
+ return ret;
}
static int fix_object_consistency(struct request *req)
--
1.7.10.2
More information about the sheepdog
mailing list