[sheepdog] [PATCH 1/2] sheep: fix forward_write_obj_req()

Liu Yuan namei.unix at gmail.com
Thu Jun 7 11:29:35 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

This patch addresses one very sutble problem, to quote from Kazutaka:

	One possibility is that if forward_write_obj_req() fails before
	receiving data, the next forward_(read|write)_obj_req() could be
	interleaved.

The interleaved requets will return random res->result and sometimes more
catastrophic, EIO to upper layer and askes the node to leave the cluster.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/gateway.c |   44 +++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/sheep/gateway.c b/sheep/gateway.c
index c2ec901..debe569 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -122,9 +122,10 @@ int forward_write_obj_req(struct request *req)
 
 		fd = get_sheep_fd(v->addr, v->port, v->node_idx, fwd_hdr.epoch);
 		if (fd < 0) {
-			eprintf("failed to connect to %s:%"PRIu32"\n", name, v->port);
+			eprintf("failed to connect to %s:%"PRIu32"\n", name,
+				v->port);
 			ret = SD_RES_NETWORK_ERROR;
-			goto out;
+			goto err;
 		}
 
 		ret = send_req(fd, &fwd_hdr, req->data, &wlen);
@@ -132,7 +133,7 @@ int forward_write_obj_req(struct request *req)
 			del_sheep_fd(fd);
 			ret = SD_RES_NETWORK_ERROR;
 			dprintf("fail %"PRIu32"\n", ret);
-			goto out;
+			goto err;
 		}
 
 		pfds[nr_fds].fd = fd;
@@ -144,16 +145,14 @@ int forward_write_obj_req(struct request *req)
 		ret = do_local_io(req, fwd_hdr.epoch);
 		rsp->result = ret;
 
-		if (nr_fds == 0) {
-			eprintf("exit %"PRIu32"\n", ret);
-			goto out;
-		}
-
 		if (rsp->result != SD_RES_SUCCESS) {
-			eprintf("fail %"PRIu32"\n", ret);
+			eprintf("fail to write local %"PRIu32"\n", ret);
 			ret = rsp->result;
-			goto out;
+			goto err;
 		}
+
+		if (nr_fds == 0)
+			goto out;
 	}
 
 	ret = SD_RES_SUCCESS;
@@ -163,22 +162,18 @@ again:
 		if (errno == EINTR)
 			goto again;
 
-		ret = SD_RES_EIO;
-	} else if (pollret == 0) { /* poll time out */
+		ret = SD_RES_NETWORK_ERROR;
+		goto err;
+	} else if (pollret == 0) {
+		/* poll time out */
 		eprintf("timeout\n");
-
-		for (i = 0; i < nr_fds; i++)
-			del_sheep_fd(pfds[i].fd);
-
 		ret = SD_RES_NETWORK_ERROR;
-		goto out;
+		goto err;
 	}
 
 	for (i = 0; i < nr_fds; i++) {
-		if (pfds[i].fd < 0)
-			break;
-
-		if (pfds[i].revents & POLLERR || pfds[i].revents & POLLHUP || pfds[i].revents & POLLNVAL) {
+		if (pfds[i].revents & POLLERR || pfds[i].revents & POLLHUP ||
+		    pfds[i].revents & POLLNVAL) {
 			del_sheep_fd(pfds[i].fd);
 			ret = SD_RES_NETWORK_ERROR;
 			break;
@@ -208,11 +203,14 @@ again:
 
 	dprintf("%"PRIx64" %"PRIu32"\n", oid, nr_fds);
 
-	if (nr_fds > 0) {
+	if (nr_fds > 0)
 		goto again;
-	}
 out:
 	return ret;
+err:
+	for (i = 0; i < nr_fds; i++)
+		del_sheep_fd(pfds[i].fd);
+	return ret;
 }
 
 static int fix_object_consistency(struct request *req)
-- 
1.7.10.2




More information about the sheepdog mailing list