[sheepdog] [PATCH 2/3] net: add read timeout for sockfd
Liu Yuan
namei.unix at gmail.com
Thu Jan 17 16:30:29 CET 2013
From: Liu Yuan <tailai.ly at taobao.com>
This fixes the hang problem demenstated by refined 035 on sheep side.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
include/net.h | 11 ++++++++++-
lib/net.c | 24 ++++++++++++++++++++++--
sheep/gateway.c | 8 --------
3 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/include/net.h b/include/net.h
index f795707..787ee79 100644
--- a/include/net.h
+++ b/include/net.h
@@ -6,6 +6,15 @@
#include "sheepdog_proto.h"
+/*
+ * We observed that for a busy node, the response could be as long as 15s, so
+ * wait 30s would be a safe value. Even we are false timeouted, the gateway will
+ * retry the request and sockfd cache module will repair the false-closes.
+ */
+#define MAX_POLLTIME 30 /* seconds */
+#define POLL_TIMEOUT 5 /* seconds */
+#define MAX_RETRY_COUNT (MAX_POLLTIME / POLL_TIMEOUT)
+
enum conn_state {
C_IO_HEADER = 0,
C_IO_DATA_INIT,
@@ -54,7 +63,7 @@ int set_nonblocking(int fd);
int set_nodelay(int fd);
int set_keepalive(int fd);
int set_snd_timeout(int fd);
-int set_timeout(int fd);
+int set_rcv_timeout(int fd);
int get_local_addr(uint8_t *bytes);
bool inetaddr_is_valid(char *addr);
diff --git a/lib/net.c b/lib/net.c
index e6d8a56..365c1e1 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -238,6 +238,13 @@ int connect_to(const char *name, int port)
break;
}
+ ret = set_rcv_timeout(fd);
+ if (ret) {
+ eprintf("failed to set recv timeout: %m\n");
+ close(fd);
+ break;
+ }
+
ret = connect(fd, res->ai_addr, res->ai_addrlen);
if (ret) {
eprintf("failed to connect to %s:%d: %m\n",
@@ -436,18 +443,31 @@ int set_nonblocking(int fd)
return ret;
}
-/* Send timeout for 5 second */
int set_snd_timeout(int fd)
{
struct timeval timeout;
- timeout.tv_sec = 5;
+ timeout.tv_sec = POLL_TIMEOUT;
timeout.tv_usec = 0;
return setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout,
sizeof(timeout));
}
+int set_rcv_timeout(int fd)
+{
+ struct timeval timeout;
+/*
+ * We should wait longer for read than write because the target node might be
+ * busy doing IO
+ */
+ timeout.tv_sec = MAX_POLLTIME;
+ timeout.tv_usec = 0;
+
+ return setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+ sizeof(timeout));
+}
+
int set_nodelay(int fd)
{
int ret, opt;
diff --git a/sheep/gateway.c b/sheep/gateway.c
index 44f9ee2..33fad42 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -149,14 +149,6 @@ static inline void pfd_info_init(struct write_info *wi, struct pfd_info *pi)
*/
static int wait_forward_request(struct write_info *wi, struct request *req)
{
-/*
- * We observed that for a busy node, the response could be as long as 15s, so
- * wait 30s would be a safe value. Even we are false timeouted, the gateway will
- * retry the request and sockfd cache module will repair the false-closes.
- */
-#define MAX_POLLTIME 30 /* seconds */
-#define POLL_TIMEOUT 5 /* seconds */
-#define MAX_RETRY_COUNT (MAX_POLLTIME / POLL_TIMEOUT)
int nr_sent, err_ret = SD_RES_SUCCESS, ret, pollret, i,
repeat = MAX_RETRY_COUNT;
struct pfd_info pi;
--
1.7.9.5
More information about the sheepdog
mailing list