[sheepdog] Some thoughts based on my preliminary I/O test

Sun May 26 04:53:51 CEST 2013

At Thu, 23 May 2013 14:10:33 +0800,
Hongyi Wang wrote:
> 
> I understand this is not a failure. But the problem is big IO which
> causes "poll timeout" hugely degrades the performance. For my test,
> I tried to write a 10GB file (bs=4m), which is expected to complete
> in 30m even in 100M network bandwidth. But I cannot finish it in 2
> hours (actually I had to kill the task). That is a bit wield.
>  

After 30 seconds timeout, sheep will stop waiting for responses from
the target nodes, and resend the write requests.  I suspect that your
sheep tried to resend the write requests forever and got no responses
because of the low network bandwidth.

To confirm the assumption, can you try the below workaround?

---- >8 ---- >8 ---- >8 ----

diff --git a/include/net.h b/include/net.h
index 75e6a76..7ce90a1 100644
--- a/include/net.h
+++ b/include/net.h
@@ -15,7 +15,6 @@
  */
 #define MAX_POLLTIME 30 /* seconds */
 #define POLL_TIMEOUT 5 /* seconds */
-#define MAX_RETRY_COUNT (MAX_POLLTIME / POLL_TIMEOUT)
 
 enum conn_state {
 	C_IO_HEADER = 0,
diff --git a/lib/net.c b/lib/net.c
index d3e8e91..809769c 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -274,7 +274,7 @@ success:
 int do_read(int sockfd, void *buf, int len, bool (*need_retry)(uint32_t epoch),
 	    uint32_t epoch)
 {
-	int ret, repeat = MAX_RETRY_COUNT;
+	int ret;
 reread:
 	ret = read(sockfd, buf, len);
 	if (ret == 0) {
@@ -288,11 +288,9 @@ reread:
 		 * Since we set timeout for read, we'll get EAGAIN even for
 		 * blocking sockfd.
 		 */
-		if (errno == EAGAIN && repeat &&
-		    (need_retry == NULL || need_retry(epoch))) {
-			repeat--;
+		if (errno == EAGAIN &&
+		    (need_retry == NULL || need_retry(epoch)))
 			goto reread;
-		}
 
 		sd_eprintf("failed to read from socket: %d, %m", ret);
 		return 1;
@@ -322,7 +320,7 @@ static void forward_iov(struct msghdr *msg, int len)
 static int do_write(int sockfd, struct msghdr *msg, int len,
 		    bool (*need_retry)(uint32_t), uint32_t epoch)
 {
-	int ret, repeat = MAX_RETRY_COUNT;
+	int ret;
 rewrite:
 	ret = sendmsg(sockfd, msg, 0);
 	if (ret < 0) {
@@ -332,11 +330,9 @@ rewrite:
 		 * Since we set timeout for write, we'll get EAGAIN even for
 		 * blocking sockfd.
 		 */
-		if (errno == EAGAIN && repeat &&
-		    (need_retry == NULL || need_retry(epoch))) {
-			repeat--;
+		if (errno == EAGAIN &&
+		    (need_retry == NULL || need_retry(epoch)))
 			goto rewrite;
-		}
 
 		sd_eprintf("failed to write to socket: %m");
 		return 1;
diff --git a/sheep/gateway.c b/sheep/gateway.c
index 8495380..e65935a 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -151,8 +151,7 @@ static inline void pfd_info_init(struct write_info *wi, struct pfd_info *pi)
  */
 static int wait_forward_request(struct write_info *wi, struct request *req)
 {
-	int nr_sent, err_ret = SD_RES_SUCCESS, ret, pollret, i,
-	    repeat = MAX_RETRY_COUNT;
+	int nr_sent, err_ret = SD_RES_SUCCESS, ret, pollret, i;
 	struct pfd_info pi;
 	struct sd_rsp *rsp = &req->rp;
 again:
@@ -165,11 +164,10 @@ again:
 		panic("%m");
 	} else if (pollret == 0) {
 		/*
-		 * If IO NIC is down, epoch isn't incremented, so we can't retry
-		 * for ever.
+		 * FIXME: If IO NIC is down, epoch isn't incremented, so we
+		 * can't retry for ever.
 		 */
-		if (sheep_need_retry(req->rq.epoch) && repeat) {
-			repeat--;
+		if (sheep_need_retry(req->rq.epoch)) {
 			sd_printf(SDOG_WARNING,
 				  "poll timeout %d, disks of some nodes or"
 				   " network is busy. Going to poll-wait again",