[sheepdog] [PATCH 2/2] sheep: remove timeout for socket pool

Liu Yuan namei.unix at gmail.com
Thu Jun 7 11:29:36 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

Cache pool is a TCP connection, so we don't need to timeout on it, if
they don't return data to us, it means they are really busy with preparation
of the response or with other stuff, but it will send the data to us finally.
If the node is failed without sending back response, poll will return -1 for
us.

The timeout of 5s really cause trouble from our observation, we see a lot of
timeout failure when cluster is doing IO heavily.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 include/net.h   |    2 --
 lib/net.c       |   23 -----------------------
 sheep/gateway.c |    7 +------
 sheep/sdnet.c   |    7 -------
 4 files changed, 1 insertion(+), 38 deletions(-)

diff --git a/include/net.h b/include/net.h
index d97984e..83da12a 100644
--- a/include/net.h
+++ b/include/net.h
@@ -6,8 +6,6 @@
 
 #include "sheepdog_proto.h"
 
-#define DEFAULT_SOCKET_TIMEOUT 5 /* seconds */
-
 enum conn_state {
 	C_IO_HEADER = 0,
 	C_IO_DATA_INIT,
diff --git a/lib/net.c b/lib/net.c
index bebc108..c4a96ac 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -420,29 +420,6 @@ int set_nodelay(int fd)
 	return ret;
 }
 
-int set_timeout(int fd)
-{
-	int ret;
-	const struct timeval tv = {
-		.tv_sec = DEFAULT_SOCKET_TIMEOUT,
-		.tv_usec = 0,
-	};
-
-	ret = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
-	if (ret) {
-		eprintf("failed to set send timeout\n");
-		return ret;
-	}
-
-	ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
-	if (ret) {
-		eprintf("failed to set recv timeout\n");
-		return ret;
-	}
-
-	return 0;
-}
-
 int get_local_addr(uint8_t *bytes)
 {
 	struct ifaddrs *ifaddr, *ifa;
diff --git a/sheep/gateway.c b/sheep/gateway.c
index debe569..e92f3ed 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -157,18 +157,13 @@ int forward_write_obj_req(struct request *req)
 
 	ret = SD_RES_SUCCESS;
 again:
-	pollret = poll(pfds, nr_fds, DEFAULT_SOCKET_TIMEOUT * 1000);
+	pollret = poll(pfds, nr_fds, -1);
 	if (pollret < 0) {
 		if (errno == EINTR)
 			goto again;
 
 		ret = SD_RES_NETWORK_ERROR;
 		goto err;
-	} else if (pollret == 0) {
-		/* poll time out */
-		eprintf("timeout\n");
-		ret = SD_RES_NETWORK_ERROR;
-		goto err;
 	}
 
 	for (i = 0; i < nr_fds; i++) {
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index 6323ee3..bd09217 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -870,13 +870,6 @@ int get_sheep_fd(uint8_t *addr, uint16_t port, int node_idx, uint32_t epoch)
 	if (fd < 0)
 		return -1;
 
-	ret = set_timeout(fd);
-	if (ret) {
-		eprintf("%m\n");
-		close(fd);
-		return -1;
-	}
-
 	ret = set_nodelay(fd);
 	if (ret) {
 		eprintf("%m\n");
-- 
1.7.10.2




More information about the sheepdog mailing list