This fixes an OOM bug which occurs when there are many I/O requests. Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp> --- include/net.h | 5 +++++ lib/net.c | 22 ++++++++++++++++++++-- sheep/group.c | 1 + sheep/sdnet.c | 28 +++++++++++++++++++++++++--- sheep/sheep_priv.h | 5 +++++ 5 files changed, 56 insertions(+), 5 deletions(-) diff --git a/include/net.h b/include/net.h index 6a9a787..9e51fea 100644 --- a/include/net.h +++ b/include/net.h @@ -13,6 +13,7 @@ enum conn_state { struct connection { int fd; + unsigned int events; enum conn_state c_rx_state; int rx_length; @@ -23,10 +24,14 @@ struct connection { int tx_length; void *tx_buf; struct sd_rsp tx_hdr; + + struct list_head blocking_siblings; }; int conn_tx_off(struct connection *conn); int conn_tx_on(struct connection *conn); +int conn_rx_off(struct connection *conn); +int conn_rx_on(struct connection *conn); int is_conn_dead(struct connection *conn); int do_read(int sockfd, void *buf, int len); int rx(struct connection *conn, enum conn_state next_state); diff --git a/lib/net.c b/lib/net.c index 96173d4..60b1dd7 100644 --- a/lib/net.c +++ b/lib/net.c @@ -31,12 +31,30 @@ int conn_tx_off(struct connection *conn) { - return modify_event(conn->fd, EPOLLIN); + conn->events &= ~EPOLLOUT; + + return modify_event(conn->fd, conn->events); } int conn_tx_on(struct connection *conn) { - return modify_event(conn->fd, EPOLLIN|EPOLLOUT); + conn->events |= EPOLLOUT; + + return modify_event(conn->fd, conn->events); +} + +int conn_rx_off(struct connection *conn) +{ + conn->events &= ~EPOLLIN; + + return modify_event(conn->fd, conn->events); +} + +int conn_rx_on(struct connection *conn) +{ + conn->events |= EPOLLIN; + + return modify_event(conn->fd, conn->events); } int is_conn_dead(struct connection *conn) diff --git a/sheep/group.c b/sheep/group.c index 58ed81d..0f0b5f5 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -1304,6 +1304,7 @@ int create_cluster(int port, int64_t zone) INIT_LIST_HEAD(&sys->outstanding_req_list); INIT_LIST_HEAD(&sys->req_wait_for_obj_list); INIT_LIST_HEAD(&sys->consistent_obj_list); + INIT_LIST_HEAD(&sys->blocking_conn_list); INIT_LIST_HEAD(&sys->cpg_event_siblings); diff --git a/sheep/sdnet.c b/sheep/sdnet.c index 1d58e59..6114132 100644 --- a/sheep/sdnet.c +++ b/sheep/sdnet.c @@ -261,6 +261,7 @@ static struct request *alloc_request(struct client_info *ci, int data_length) req->ci = ci; client_incref(ci); if (data_length) { + req->data_length = data_length; req->data = valloc(data_length); if (!req->data) { free(req); @@ -272,17 +273,19 @@ static struct request *alloc_request(struct client_info *ci, int data_length) INIT_LIST_HEAD(&req->r_wlist); sys->nr_outstanding_reqs++; + sys->outstanding_data_size += data_length; return req; } static void free_request(struct request *req) { + sys->nr_outstanding_reqs--; + sys->outstanding_data_size -= req->data_length; + list_del(&req->r_siblings); free(req->data); free(req); - - sys->nr_outstanding_reqs--; } static void req_done(struct request *req) @@ -318,6 +321,13 @@ static void client_rx_handler(struct client_info *ci) struct sd_req *hdr = &conn->rx_hdr; struct request *req; + if (!ci->rx_req && sys->outstanding_data_size > MAX_OUTSTANDING_DATA_SIZE) { + dprintf("too many requests, %p\n", &ci->conn); + conn_rx_off(&ci->conn); + list_add(&ci->conn.blocking_siblings, &sys->blocking_conn_list); + return; + } + switch (conn->c_rx_state) { case C_IO_HEADER: ret = rx(conn, C_IO_DATA_INIT); @@ -405,11 +415,19 @@ static void client_tx_handler(struct client_info *ci) { int ret, opt; struct sd_rsp *rsp = (struct sd_rsp *)&ci->conn.tx_hdr; - + struct connection *conn, *n; again: init_tx_hdr(ci); if (!ci->tx_req) { conn_tx_off(&ci->conn); + if (sys->outstanding_data_size < MAX_OUTSTANDING_DATA_SIZE) { + list_for_each_entry_safe(conn, n, &sys->blocking_conn_list, + blocking_siblings) { + dprintf("rx on, %p\n", conn); + list_del(&conn->blocking_siblings); + conn_rx_on(conn); + } + } return; } @@ -480,6 +498,7 @@ static struct client_info *create_client(int fd, struct cluster_info *cluster) return NULL; ci->conn.fd = fd; + ci->conn.events = EPOLLIN; ci->refcnt = 1; INIT_LIST_HEAD(&ci->reqs); @@ -501,6 +520,9 @@ static void client_handler(int fd, int events, void *data) client_tx_handler(ci); if (is_conn_dead(&ci->conn)) { + if (!(ci->conn.events & EPOLLIN)) + list_del(&ci->conn.blocking_siblings); + dprintf("closed a connection, %d\n", fd); unregister_event(fd); client_decref(ci); diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 16f5672..a7a278a 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -75,6 +75,7 @@ struct request { struct sd_op_template *op; void *data; + unsigned int data_length; struct client_info *ci; struct list_head r_siblings; @@ -101,6 +102,8 @@ struct data_object_bmap { struct list_head list; }; +#define MAX_OUTSTANDING_DATA_SIZE (256 * 1024 * 1024) + struct cluster_info { struct cluster_driver *cdrv; @@ -131,6 +134,7 @@ struct cluster_info { struct list_head outstanding_req_list; struct list_head req_wait_for_obj_list; struct list_head consistent_obj_list; + struct list_head blocking_conn_list; uint32_t nr_sobjs; @@ -138,6 +142,7 @@ struct cluster_info { struct cpg_event *cur_cevent; int nr_outstanding_io; int nr_outstanding_reqs; + unsigned int outstanding_data_size; uint32_t recovered_epoch; -- 1.7.2.5 |