[Sheepdog] [PATCH] sheep: block I/O requests under high memory pressure

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Wed Nov 2 08:03:03 CET 2011


This fixes an OOM bug which occurs when there are many I/O requests.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 include/net.h      |    5 +++++
 lib/net.c          |   22 ++++++++++++++++++++--
 sheep/group.c      |    1 +
 sheep/sdnet.c      |   28 +++++++++++++++++++++++++---
 sheep/sheep_priv.h |    5 +++++
 5 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/include/net.h b/include/net.h
index 6a9a787..9e51fea 100644
--- a/include/net.h
+++ b/include/net.h
@@ -13,6 +13,7 @@ enum conn_state {
 
 struct connection {
 	int fd;
+	unsigned int events;
 
 	enum conn_state c_rx_state;
 	int rx_length;
@@ -23,10 +24,14 @@ struct connection {
 	int tx_length;
 	void *tx_buf;
 	struct sd_rsp tx_hdr;
+
+	struct list_head blocking_siblings;
 };
 
 int conn_tx_off(struct connection *conn);
 int conn_tx_on(struct connection *conn);
+int conn_rx_off(struct connection *conn);
+int conn_rx_on(struct connection *conn);
 int is_conn_dead(struct connection *conn);
 int do_read(int sockfd, void *buf, int len);
 int rx(struct connection *conn, enum conn_state next_state);
diff --git a/lib/net.c b/lib/net.c
index 96173d4..60b1dd7 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -31,12 +31,30 @@
 
 int conn_tx_off(struct connection *conn)
 {
-	return modify_event(conn->fd, EPOLLIN);
+	conn->events &= ~EPOLLOUT;
+
+	return modify_event(conn->fd, conn->events);
 }
 
 int conn_tx_on(struct connection *conn)
 {
-	return modify_event(conn->fd, EPOLLIN|EPOLLOUT);
+	conn->events |= EPOLLOUT;
+
+	return modify_event(conn->fd, conn->events);
+}
+
+int conn_rx_off(struct connection *conn)
+{
+	conn->events &= ~EPOLLIN;
+
+	return modify_event(conn->fd, conn->events);
+}
+
+int conn_rx_on(struct connection *conn)
+{
+	conn->events |= EPOLLIN;
+
+	return modify_event(conn->fd, conn->events);
 }
 
 int is_conn_dead(struct connection *conn)
diff --git a/sheep/group.c b/sheep/group.c
index 58ed81d..0f0b5f5 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -1304,6 +1304,7 @@ int create_cluster(int port, int64_t zone)
 	INIT_LIST_HEAD(&sys->outstanding_req_list);
 	INIT_LIST_HEAD(&sys->req_wait_for_obj_list);
 	INIT_LIST_HEAD(&sys->consistent_obj_list);
+	INIT_LIST_HEAD(&sys->blocking_conn_list);
 
 	INIT_LIST_HEAD(&sys->cpg_event_siblings);
 
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index 1d58e59..6114132 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -261,6 +261,7 @@ static struct request *alloc_request(struct client_info *ci, int data_length)
 	req->ci = ci;
 	client_incref(ci);
 	if (data_length) {
+		req->data_length = data_length;
 		req->data = valloc(data_length);
 		if (!req->data) {
 			free(req);
@@ -272,17 +273,19 @@ static struct request *alloc_request(struct client_info *ci, int data_length)
 	INIT_LIST_HEAD(&req->r_wlist);
 
 	sys->nr_outstanding_reqs++;
+	sys->outstanding_data_size += data_length;
 
 	return req;
 }
 
 static void free_request(struct request *req)
 {
+	sys->nr_outstanding_reqs--;
+	sys->outstanding_data_size -= req->data_length;
+
 	list_del(&req->r_siblings);
 	free(req->data);
 	free(req);
-
-	sys->nr_outstanding_reqs--;
 }
 
 static void req_done(struct request *req)
@@ -318,6 +321,13 @@ static void client_rx_handler(struct client_info *ci)
 	struct sd_req *hdr = &conn->rx_hdr;
 	struct request *req;
 
+	if (!ci->rx_req && sys->outstanding_data_size > MAX_OUTSTANDING_DATA_SIZE) {
+		dprintf("too many requests, %p\n", &ci->conn);
+		conn_rx_off(&ci->conn);
+		list_add(&ci->conn.blocking_siblings, &sys->blocking_conn_list);
+		return;
+	}
+
 	switch (conn->c_rx_state) {
 	case C_IO_HEADER:
 		ret = rx(conn, C_IO_DATA_INIT);
@@ -405,11 +415,19 @@ static void client_tx_handler(struct client_info *ci)
 {
 	int ret, opt;
 	struct sd_rsp *rsp = (struct sd_rsp *)&ci->conn.tx_hdr;
-
+	struct connection *conn, *n;
 again:
 	init_tx_hdr(ci);
 	if (!ci->tx_req) {
 		conn_tx_off(&ci->conn);
+		if (sys->outstanding_data_size < MAX_OUTSTANDING_DATA_SIZE) {
+			list_for_each_entry_safe(conn, n, &sys->blocking_conn_list,
+						 blocking_siblings) {
+				dprintf("rx on, %p\n", conn);
+				list_del(&conn->blocking_siblings);
+				conn_rx_on(conn);
+			}
+		}
 		return;
 	}
 
@@ -480,6 +498,7 @@ static struct client_info *create_client(int fd, struct cluster_info *cluster)
 		return NULL;
 
 	ci->conn.fd = fd;
+	ci->conn.events = EPOLLIN;
 	ci->refcnt = 1;
 
 	INIT_LIST_HEAD(&ci->reqs);
@@ -501,6 +520,9 @@ static void client_handler(int fd, int events, void *data)
 		client_tx_handler(ci);
 
 	if (is_conn_dead(&ci->conn)) {
+		if (!(ci->conn.events & EPOLLIN))
+			list_del(&ci->conn.blocking_siblings);
+
 		dprintf("closed a connection, %d\n", fd);
 		unregister_event(fd);
 		client_decref(ci);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 16f5672..a7a278a 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -75,6 +75,7 @@ struct request {
 	struct sd_op_template *op;
 
 	void *data;
+	unsigned int data_length;
 
 	struct client_info *ci;
 	struct list_head r_siblings;
@@ -101,6 +102,8 @@ struct data_object_bmap {
 	struct list_head list;
 };
 
+#define MAX_OUTSTANDING_DATA_SIZE (256 * 1024 * 1024)
+
 struct cluster_info {
 	struct cluster_driver *cdrv;
 
@@ -131,6 +134,7 @@ struct cluster_info {
 	struct list_head outstanding_req_list;
 	struct list_head req_wait_for_obj_list;
 	struct list_head consistent_obj_list;
+	struct list_head blocking_conn_list;
 
 	uint32_t nr_sobjs;
 
@@ -138,6 +142,7 @@ struct cluster_info {
 	struct cpg_event *cur_cevent;
 	int nr_outstanding_io;
 	int nr_outstanding_reqs;
+	unsigned int outstanding_data_size;
 
 	uint32_t recovered_epoch;
 
-- 
1.7.2.5




More information about the sheepdog mailing list