[Sheepdog] [PATCH] sheep: set send/recv timeout
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Mon Dec 12 12:14:09 CET 2011
send()/recv() could sleep long time if network failure happens during
network I/Os, and it prevents from incrementing epoch number because
we assumes that there is no outstanding I/O requests while updating
node membership info. This patch fixes the problem.
It is not a problem to set a small value for timeout because I/Os are
retried automatically even if send/recv timeout has occurred.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
Hi Yibin,
I think this patch is much simpler than using timeout in poll().
How do you think?
I also tried tcp keepalive, but it doesn't work on my environment for
some reason.
Thanks,
Kazutaka
include/net.h | 3 +++
lib/net.c | 27 +++++++++++++++++++++++++--
sheep/sdnet.c | 7 +++++++
3 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/include/net.h b/include/net.h
index 9e51fea..2d087e2 100644
--- a/include/net.h
+++ b/include/net.h
@@ -3,6 +3,8 @@
#include <sys/socket.h>
+#define DEFAULT_SOCKET_TIMEOUT 5 /* seconds */
+
enum conn_state {
C_IO_HEADER = 0,
C_IO_DATA_INIT,
@@ -45,5 +47,6 @@ int create_listen_ports(int port, int (*callback)(int fd, void *), void *data);
char *addr_to_str(char *str, int size, uint8_t *addr, uint16_t port);
int set_nonblocking(int fd);
int set_nodelay(int fd);
+int set_timeout(int fd);
#endif
diff --git a/lib/net.c b/lib/net.c
index d4a5d9b..3caba0f 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -242,7 +242,7 @@ int do_read(int sockfd, void *buf, int len)
reread:
ret = read(sockfd, buf, len);
if (ret < 0 || !ret) {
- if (errno == EINTR || errno == EAGAIN)
+ if (errno == EINTR)
goto reread;
fprintf(stderr, "failed to read from socket: %m\n");
return 1;
@@ -275,7 +275,7 @@ static int do_write(int sockfd, struct msghdr *msg, int len)
rewrite:
ret = sendmsg(sockfd, msg, 0);
if (ret < 0) {
- if (errno == EINTR || errno == EAGAIN)
+ if (errno == EINTR)
goto rewrite;
fprintf(stderr, "failed to write to socket: %m\n");
return 1;
@@ -397,3 +397,26 @@ int set_nodelay(int fd)
ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt));
return ret;
}
+
+int set_timeout(int fd)
+{
+ int ret;
+ const struct timeval tv = {
+ .tv_sec = DEFAULT_SOCKET_TIMEOUT,
+ .tv_usec = 0,
+ };
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
+ if (ret) {
+ eprintf("failed to set send timeout\n");
+ return ret;
+ }
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+ if (ret) {
+ eprintf("failed to set recv timeout\n");
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index 8455653..510fd4e 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -841,6 +841,13 @@ int get_sheep_fd(uint8_t *addr, uint16_t port, int node_idx,
if (fd < 0)
return -1;
+ ret = set_timeout(fd);
+ if (ret) {
+ eprintf("%m\n");
+ close(fd);
+ return -1;
+ }
+
ret = set_nodelay(fd);
if (ret) {
eprintf("%m\n");
--
1.7.2.5
More information about the sheepdog
mailing list