[Sheepdog] [PATCH] sheep: block I/O requests under high memory pressure
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Wed Nov 2 08:03:03 CET 2011
This fixes an OOM bug which occurs when there are many I/O requests.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
include/net.h | 5 +++++
lib/net.c | 22 ++++++++++++++++++++--
sheep/group.c | 1 +
sheep/sdnet.c | 28 +++++++++++++++++++++++++---
sheep/sheep_priv.h | 5 +++++
5 files changed, 56 insertions(+), 5 deletions(-)
diff --git a/include/net.h b/include/net.h
index 6a9a787..9e51fea 100644
--- a/include/net.h
+++ b/include/net.h
@@ -13,6 +13,7 @@ enum conn_state {
struct connection {
int fd;
+ unsigned int events;
enum conn_state c_rx_state;
int rx_length;
@@ -23,10 +24,14 @@ struct connection {
int tx_length;
void *tx_buf;
struct sd_rsp tx_hdr;
+
+ struct list_head blocking_siblings;
};
int conn_tx_off(struct connection *conn);
int conn_tx_on(struct connection *conn);
+int conn_rx_off(struct connection *conn);
+int conn_rx_on(struct connection *conn);
int is_conn_dead(struct connection *conn);
int do_read(int sockfd, void *buf, int len);
int rx(struct connection *conn, enum conn_state next_state);
diff --git a/lib/net.c b/lib/net.c
index 96173d4..60b1dd7 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -31,12 +31,30 @@
int conn_tx_off(struct connection *conn)
{
- return modify_event(conn->fd, EPOLLIN);
+ conn->events &= ~EPOLLOUT;
+
+ return modify_event(conn->fd, conn->events);
}
int conn_tx_on(struct connection *conn)
{
- return modify_event(conn->fd, EPOLLIN|EPOLLOUT);
+ conn->events |= EPOLLOUT;
+
+ return modify_event(conn->fd, conn->events);
+}
+
+int conn_rx_off(struct connection *conn)
+{
+ conn->events &= ~EPOLLIN;
+
+ return modify_event(conn->fd, conn->events);
+}
+
+int conn_rx_on(struct connection *conn)
+{
+ conn->events |= EPOLLIN;
+
+ return modify_event(conn->fd, conn->events);
}
int is_conn_dead(struct connection *conn)
diff --git a/sheep/group.c b/sheep/group.c
index 58ed81d..0f0b5f5 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -1304,6 +1304,7 @@ int create_cluster(int port, int64_t zone)
INIT_LIST_HEAD(&sys->outstanding_req_list);
INIT_LIST_HEAD(&sys->req_wait_for_obj_list);
INIT_LIST_HEAD(&sys->consistent_obj_list);
+ INIT_LIST_HEAD(&sys->blocking_conn_list);
INIT_LIST_HEAD(&sys->cpg_event_siblings);
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index 1d58e59..6114132 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -261,6 +261,7 @@ static struct request *alloc_request(struct client_info *ci, int data_length)
req->ci = ci;
client_incref(ci);
if (data_length) {
+ req->data_length = data_length;
req->data = valloc(data_length);
if (!req->data) {
free(req);
@@ -272,17 +273,19 @@ static struct request *alloc_request(struct client_info *ci, int data_length)
INIT_LIST_HEAD(&req->r_wlist);
sys->nr_outstanding_reqs++;
+ sys->outstanding_data_size += data_length;
return req;
}
static void free_request(struct request *req)
{
+ sys->nr_outstanding_reqs--;
+ sys->outstanding_data_size -= req->data_length;
+
list_del(&req->r_siblings);
free(req->data);
free(req);
-
- sys->nr_outstanding_reqs--;
}
static void req_done(struct request *req)
@@ -318,6 +321,13 @@ static void client_rx_handler(struct client_info *ci)
struct sd_req *hdr = &conn->rx_hdr;
struct request *req;
+ if (!ci->rx_req && sys->outstanding_data_size > MAX_OUTSTANDING_DATA_SIZE) {
+ dprintf("too many requests, %p\n", &ci->conn);
+ conn_rx_off(&ci->conn);
+ list_add(&ci->conn.blocking_siblings, &sys->blocking_conn_list);
+ return;
+ }
+
switch (conn->c_rx_state) {
case C_IO_HEADER:
ret = rx(conn, C_IO_DATA_INIT);
@@ -405,11 +415,19 @@ static void client_tx_handler(struct client_info *ci)
{
int ret, opt;
struct sd_rsp *rsp = (struct sd_rsp *)&ci->conn.tx_hdr;
-
+ struct connection *conn, *n;
again:
init_tx_hdr(ci);
if (!ci->tx_req) {
conn_tx_off(&ci->conn);
+ if (sys->outstanding_data_size < MAX_OUTSTANDING_DATA_SIZE) {
+ list_for_each_entry_safe(conn, n, &sys->blocking_conn_list,
+ blocking_siblings) {
+ dprintf("rx on, %p\n", conn);
+ list_del(&conn->blocking_siblings);
+ conn_rx_on(conn);
+ }
+ }
return;
}
@@ -480,6 +498,7 @@ static struct client_info *create_client(int fd, struct cluster_info *cluster)
return NULL;
ci->conn.fd = fd;
+ ci->conn.events = EPOLLIN;
ci->refcnt = 1;
INIT_LIST_HEAD(&ci->reqs);
@@ -501,6 +520,9 @@ static void client_handler(int fd, int events, void *data)
client_tx_handler(ci);
if (is_conn_dead(&ci->conn)) {
+ if (!(ci->conn.events & EPOLLIN))
+ list_del(&ci->conn.blocking_siblings);
+
dprintf("closed a connection, %d\n", fd);
unregister_event(fd);
client_decref(ci);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 16f5672..a7a278a 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -75,6 +75,7 @@ struct request {
struct sd_op_template *op;
void *data;
+ unsigned int data_length;
struct client_info *ci;
struct list_head r_siblings;
@@ -101,6 +102,8 @@ struct data_object_bmap {
struct list_head list;
};
+#define MAX_OUTSTANDING_DATA_SIZE (256 * 1024 * 1024)
+
struct cluster_info {
struct cluster_driver *cdrv;
@@ -131,6 +134,7 @@ struct cluster_info {
struct list_head outstanding_req_list;
struct list_head req_wait_for_obj_list;
struct list_head consistent_obj_list;
+ struct list_head blocking_conn_list;
uint32_t nr_sobjs;
@@ -138,6 +142,7 @@ struct cluster_info {
struct cpg_event *cur_cevent;
int nr_outstanding_io;
int nr_outstanding_reqs;
+ unsigned int outstanding_data_size;
uint32_t recovered_epoch;
--
1.7.2.5
More information about the sheepdog
mailing list