Under heavy load, sheep process continues to execute event_loop() even if admins execute "dog node kill" and "dog cluster shutdown" because sys->nr_outstanding_reqs rarely reaches 0. In our case, actual exit of sheep process took about from 10 min to 1 hour to complete. This patch prevents the starvation of the stop requests by unregistering events of listening fds. By unregistering, sheep can stop generating new requests. Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp> --- sheep/ops.c | 18 ++++++++++++------ sheep/request.c | 23 +++++++++++++++++++++++ sheep/sheep_priv.h | 1 + 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/sheep/ops.c b/sheep/ops.c index 481789f..c42ad81 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -304,12 +304,17 @@ static int cluster_shutdown(const struct sd_req *req, struct sd_rsp *rsp, void *data) { sys->cinfo.status = SD_STATUS_SHUTDOWN; - if (!node_in_recovery() && set_cluster_shutdown(true) != SD_RES_SUCCESS) - /* - * It's okay we failed to set 'shutdown', just start recovery - * after restart blindly. - */ - sd_err("failed to set cluster as shutdown"); + if (!node_in_recovery()) { + unregister_listening_fds(); + + if (set_cluster_shutdown(true) != SD_RES_SUCCESS) + /* + * It's okay we failed to set 'shutdown', just start + * recovery after restart blindly. + */ + sd_err("failed to set cluster as shutdown"); + } + return SD_RES_SUCCESS; } @@ -910,6 +915,7 @@ static int local_kill_node(const struct sd_req *req, struct sd_rsp *rsp, void *data) { sys->cinfo.status = SD_STATUS_KILLED; + unregister_listening_fds(); return SD_RES_SUCCESS; } diff --git a/sheep/request.c b/sheep/request.c index fbaf645..8b35be8 100644 --- a/sheep/request.c +++ b/sheep/request.c @@ -995,11 +995,34 @@ static void listen_handler(int listen_fd, int events, void *data) sd_debug("accepted a new connection: %d", fd); } +static LIST_HEAD(listening_fd_list); + +struct listening_fd { + int fd; + struct list_node list; +}; + static int create_listen_port_fn(int fd, void *data) { + struct listening_fd *new_fd; + + new_fd = xzalloc(sizeof(*new_fd)); + new_fd->fd = fd; + list_add_tail(&new_fd->list, &listening_fd_list); + return register_event(fd, listen_handler, data); } +void unregister_listening_fds(void) +{ + struct listening_fd *fd; + + list_for_each_entry(fd, &listening_fd_list, list) { + sd_debug("unregistering fd: %d", fd->fd); + unregister_event(fd->fd); + } +} + int create_listen_port(const char *bindaddr, int port) { static bool is_inet_socket = true; diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index f59dc06..d7a9bbd 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -283,6 +283,7 @@ static inline bool is_aligned_to_pagesize(void *p) int create_listen_port(const char *bindaddr, int port); int init_unix_domain_socket(const char *dir); +void unregister_listening_fds(void); int init_store_driver(bool is_gateway); int init_global_pathnames(const char *d, char *); -- 1.7.10.4 |