[sheepdog] [PATCH 3/3] gateway: fix poll() hang

Liu Yuan namei.unix at gmail.com
Tue Sep 4 11:00:58 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

poll() might wait for unbounded period of time,  when 
    a) header in send buffer is not acked
    b) the remote node is crashed 

So if we detect that the send buffer is not empty, we switch to timeout poll.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>

---
 sheep/gateway.c |   42 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/sheep/gateway.c b/sheep/gateway.c
index 41d712b..2fdaa58 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -12,6 +12,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <poll.h>
+#include <sys/ioctl.h>
 
 #include "sheep_priv.h"
 
@@ -134,12 +135,32 @@ struct pfd_info {
 	int nr;
 };
 
-static inline void pfd_info_init(struct write_info *wi, struct pfd_info *pi)
+static inline int pfd_info_init(struct write_info *wi, struct pfd_info *pi)
 {
-	int i;
-	for (i = 0; i < wi->nr_sent; i++)
+	int i, buf_empty = 1, bytes;
+	for (i = 0; i < wi->nr_sent; i++) {
 		pi->pfds[i] = wi->ent[i].pfd;
+		if (buf_empty) {
+			/*
+			 * Check if the sockfd send buffer is empty.
+			 *
+			 * For a non-empty buffer (not acked yet), there is
+			 * possibilty that the remote node is down, in which
+			 * case we use a timeout poll to avoid poll infinitely
+			 */
+			if (ioctl(pi->pfds[i].fd, TIOCOUTQ, &bytes) < 0) {
+				eprintf("%m\n");
+				continue;
+			}
+			if (bytes != 0) {
+				buf_empty = 0;
+				dprintf("send buffer is not empty\n");
+			}
+		}
+	}
+
 	pi->nr = wi->nr_sent;
+	return buf_empty;
 }
 
 /*
@@ -152,16 +173,25 @@ static inline void pfd_info_init(struct write_info *wi, struct pfd_info *pi)
  */
 static int wait_forward_request(struct write_info *wi, struct sd_rsp *rsp)
 {
-	int nr_sent, err_ret = SD_RES_SUCCESS, ret, pollret, i;
+	int nr_sent, err_ret = SD_RES_SUCCESS, ret, pollret, i, buf_empty;
 	struct pfd_info pi;;
 again:
-	pfd_info_init(wi, &pi);
-	pollret = poll(pi.pfds, pi.nr, -1);
+	buf_empty= pfd_info_init(wi, &pi);
+	pollret = poll(pi.pfds, pi.nr, buf_empty ? -1 : 5000);
 	if (pollret < 0) {
 		if (errno == EINTR)
 			goto again;
 
 		panic("%m\n");
+	} else if (pollret == 0) {
+		eprintf("poll timeout\n");
+		nr_sent = wi->nr_sent;
+		/* XXX Blinedly close all the connections */
+		for (i = 0; i < nr_sent; i++)
+			finish_one_write_err(wi, i);
+
+		err_ret = SD_RES_NETWORK_ERROR;
+		goto finish_write;
 	}
 
 	nr_sent = wi->nr_sent;
-- 
1.7.10.2




More information about the sheepdog mailing list