[sheepdog] [PATCH 4/4] sheep: fix eventfd_read() error handling

Liu Yuan namei.unix at gmail.com
Sat Sep 1 17:26:18 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

This is motivated by a crash log:
...
Sep 01 21:53:50 [block] suspend(64) going to resume
Sep 01 21:53:50 [block] exec_local_req(449) event fd read error Interrupted system call
Sep 01 21:53:50 [block] do_process_work(1245) failed: 12, 0 , 1, 8
Sep 01 21:53:50 [main] cluster_op_done(283) LOCK_VDI (0xc7a790)
Sep 01 21:53:50 [gway 4] do_process_work(1238) 83, c7c850, 32580
...

that eventfd_read() get EINTR without proper handling and then mess up the next
process.

- retry read in exec_local_req() (worker thread)
- return in enable/disable_tracer() (main thread)

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/request.c     | 10 +++++++---
 sheep/trace/trace.c | 16 ++++++++++++----
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/sheep/request.c b/sheep/request.c
index bcf705a..3f9b870 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -426,7 +426,8 @@ static struct request *alloc_local_request(void *data, int data_length)
 /*
  * Exec the request locally and synchronously.
  *
- * This function takes advantage of gateway's retry mechanism.
+ * This function takes advantage of gateway's retry mechanism and can be only
+ * called from worker thread.
  */
 int exec_local_req(struct sd_req *rq, void *data)
 {
@@ -444,9 +445,13 @@ int exec_local_req(struct sd_req *rq, void *data)
 
 	eventfd_write(sys->req_efd, value);
 
+again:
+	/* In error case (for e.g, EINTR) just retry read */
 	ret = eventfd_read(req->wait_efd, &value);
-	if (ret < 0)
+	if (ret < 0) {
 		eprintf("%m\n");
+		goto again;
+	}
 
 	close(req->wait_efd);
 	ret = req->rp.result;
@@ -858,7 +863,6 @@ int create_listen_port(int port, void *data)
 	return create_listen_ports(port, create_listen_port_fn, data);
 }
 
-
 static void req_handler(int listen_fd, int events, void *data)
 {
 	eventfd_t value;
diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c
index 4b59b09..f27ea39 100644
--- a/sheep/trace/trace.c
+++ b/sheep/trace/trace.c
@@ -242,8 +242,14 @@ static notrace void enable_tracer(int fd, int events, void *data)
 	int ret;
 
 	ret = eventfd_read(trace_efd, &value);
-	if (ret < 0)
-		eprintf("%m");
+	/*
+	 * In error case we can't retry read in main thread, simply return and
+	 * expected to be waken up by epoll again.
+	 */
+	if (ret < 0) {
+		eprintf("%m\n");
+		return;
+	}
 
 	if (short_thread_running())
 		return;
@@ -262,8 +268,10 @@ static notrace void disable_tracer(int fd, int events, void *data)
 	int ret;
 
 	ret = eventfd_read(fd, &value);
-	if (ret < 0)
-		eprintf("%m");
+	if (ret < 0) {
+		eprintf("%m\n");
+		return;
+	}
 
 	if (short_thread_running())
 		return;
-- 
1.7.12.84.gefa6462




More information about the sheepdog mailing list