[sheepdog] [PATCH v2] sheep: change one-shot timeout to keepalive

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Fri Jun 8 18:06:37 CEST 2012


At Fri,  8 Jun 2012 13:54:19 +0800,
Liu Yuan wrote:
> 
> From: Liu Yuan <tailai.ly at taobao.com>
> 
> The timeout of 5s really cause trouble from our observation, we see a lot of
> timeout failure when cluster is doing IO heavily.
> 
> Use keepalive means we don't fail-timeout until the other end of node is
> really down.
> 
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
>  include/net.h   |    2 --
>  lib/net.c       |   23 -----------------------
>  sheep/gateway.c |    7 +------
>  sheep/sdnet.c   |   35 +++++++++++++++++++++++++++++++++--
>  4 files changed, 34 insertions(+), 33 deletions(-)

Applied, thanks!

After the next release, let's reconsider again how to interrupt
send/recv/poll when node membership changes.

Kazutaka

> 
> diff --git a/include/net.h b/include/net.h
> index d97984e..83da12a 100644
> --- a/include/net.h
> +++ b/include/net.h
> @@ -6,8 +6,6 @@
>  
>  #include "sheepdog_proto.h"
>  
> -#define DEFAULT_SOCKET_TIMEOUT 5 /* seconds */
> -
>  enum conn_state {
>  	C_IO_HEADER = 0,
>  	C_IO_DATA_INIT,
> diff --git a/lib/net.c b/lib/net.c
> index bebc108..c4a96ac 100644
> --- a/lib/net.c
> +++ b/lib/net.c
> @@ -420,29 +420,6 @@ int set_nodelay(int fd)
>  	return ret;
>  }
>  
> -int set_timeout(int fd)
> -{
> -	int ret;
> -	const struct timeval tv = {
> -		.tv_sec = DEFAULT_SOCKET_TIMEOUT,
> -		.tv_usec = 0,
> -	};
> -
> -	ret = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
> -	if (ret) {
> -		eprintf("failed to set send timeout\n");
> -		return ret;
> -	}
> -
> -	ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
> -	if (ret) {
> -		eprintf("failed to set recv timeout\n");
> -		return ret;
> -	}
> -
> -	return 0;
> -}
> -
>  int get_local_addr(uint8_t *bytes)
>  {
>  	struct ifaddrs *ifaddr, *ifa;
> diff --git a/sheep/gateway.c b/sheep/gateway.c
> index debe569..e92f3ed 100644
> --- a/sheep/gateway.c
> +++ b/sheep/gateway.c
> @@ -157,18 +157,13 @@ int forward_write_obj_req(struct request *req)
>  
>  	ret = SD_RES_SUCCESS;
>  again:
> -	pollret = poll(pfds, nr_fds, DEFAULT_SOCKET_TIMEOUT * 1000);
> +	pollret = poll(pfds, nr_fds, -1);
>  	if (pollret < 0) {
>  		if (errno == EINTR)
>  			goto again;
>  
>  		ret = SD_RES_NETWORK_ERROR;
>  		goto err;
> -	} else if (pollret == 0) {
> -		/* poll time out */
> -		eprintf("timeout\n");
> -		ret = SD_RES_NETWORK_ERROR;
> -		goto err;
>  	}
>  
>  	for (i = 0; i < nr_fds; i++) {
> diff --git a/sheep/sdnet.c b/sheep/sdnet.c
> index 6323ee3..8479a74 100644
> --- a/sheep/sdnet.c
> +++ b/sheep/sdnet.c
> @@ -828,6 +828,38 @@ void del_sheep_fd(int fd)
>  	}
>  }
>  
> +/*
> + * Timeout after request is issued after 5s.
> + *
> + * Heart-beat message will be sent periodically with 1s interval.
> + * If the node of the other end of fd fails, we'll detect it in 3s
> + */
> +static int set_keepalive(int fd)
> +{
> +	int val = 1;
> +
> +	if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)) < 0) {
> +		dprintf("%m\n");
> +		return -1;
> +	}
> +	val = 5;
> +	if (setsockopt(fd, SOL_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0) {
> +		dprintf("%m\n");
> +		return -1;
> +	}
> +	val = 1;
> +	if (setsockopt(fd, SOL_TCP, TCP_KEEPINTVL, &val, sizeof(val)) < 0) {
> +		dprintf("%m\n");
> +		return -1;
> +	}
> +	val = 3;
> +	if (setsockopt(fd, SOL_TCP, TCP_KEEPCNT, &val, sizeof(val)) < 0) {
> +		dprintf("%m\n");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
>  int get_sheep_fd(uint8_t *addr, uint16_t port, int node_idx, uint32_t epoch)
>  {
>  	int i, fd, ret;
> @@ -870,9 +902,8 @@ int get_sheep_fd(uint8_t *addr, uint16_t port, int node_idx, uint32_t epoch)
>  	if (fd < 0)
>  		return -1;
>  
> -	ret = set_timeout(fd);
> +	ret = set_keepalive(fd);
>  	if (ret) {
> -		eprintf("%m\n");
>  		close(fd);
>  		return -1;
>  	}
> -- 
> 1.7.10.2
> 
> -- 
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog



More information about the sheepdog mailing list