[sheepdog] [PATCH v2] sheep: prevent starvation of "node kill" requests
Liu Yuan
namei.unix at gmail.com
Fri Feb 21 06:21:21 CET 2014
On Fri, Feb 21, 2014 at 11:35:09AM +0900, Hitoshi Mitake wrote:
> Under heavy load, sheep process continues to execute event_loop()
> even if admins execute "dog node kill" because
> sys->nr_outstanding_reqs rarely reaches 0. In our case, actual
> exit of sheep process took about from 10 min to 1 hour to complete.
>
> This patch prevents the starvation of the stop requests by
> unregistering events of listening fds. By unregistering, sheep can
> stop generating new requests.
>
> Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> ---
>
> v2: update commit log. "cluster shutdown" could be completed quickly. the
> previous commit log was wrong.
>
> sheep/ops.c | 18 ++++++++++++------
> sheep/request.c | 23 +++++++++++++++++++++++
> sheep/sheep_priv.h | 1 +
> 3 files changed, 36 insertions(+), 6 deletions(-)
>
> diff --git a/sheep/ops.c b/sheep/ops.c
> index bd663eb..6451563 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -304,12 +304,17 @@ static int cluster_shutdown(const struct sd_req *req, struct sd_rsp *rsp,
> void *data)
> {
> sys->cinfo.status = SD_STATUS_SHUTDOWN;
> - if (!node_in_recovery() && set_cluster_shutdown(true) != SD_RES_SUCCESS)
> - /*
> - * It's okay we failed to set 'shutdown', just start recovery
> - * after restart blindly.
> - */
> - sd_err("failed to set cluster as shutdown");
> + if (!node_in_recovery()) {
> + unregister_listening_fds();
> +
> + if (set_cluster_shutdown(true) != SD_RES_SUCCESS)
> + /*
> + * It's okay we failed to set 'shutdown', just start
> + * recovery after restart blindly.
> + */
> + sd_err("failed to set cluster as shutdown");
> + }
> +
> return SD_RES_SUCCESS;
> }
>
> @@ -910,6 +915,7 @@ static int local_kill_node(const struct sd_req *req, struct sd_rsp *rsp,
> void *data)
> {
> sys->cinfo.status = SD_STATUS_KILLED;
> + unregister_listening_fds();
>
> return SD_RES_SUCCESS;
> }
> diff --git a/sheep/request.c b/sheep/request.c
> index 5867fa1..4aa928e 100644
> --- a/sheep/request.c
> +++ b/sheep/request.c
> @@ -996,11 +996,34 @@ static void listen_handler(int listen_fd, int events, void *data)
> sd_debug("accepted a new connection: %d", fd);
> }
>
> +static LIST_HEAD(listening_fd_list);
> +
> +struct listening_fd {
> + int fd;
> + struct list_node list;
> +};
> +
> static int create_listen_port_fn(int fd, void *data)
> {
> + struct listening_fd *new_fd;
> +
> + new_fd = xzalloc(sizeof(*new_fd));
> + new_fd->fd = fd;
> + list_add_tail(&new_fd->list, &listening_fd_list);
> +
> return register_event(fd, listen_handler, data);
> }
>
> +void unregister_listening_fds(void)
> +{
> + struct listening_fd *fd;
> +
> + list_for_each_entry(fd, &listening_fd_list, list) {
> + sd_debug("unregistering fd: %d", fd->fd);
> + unregister_event(fd->fd);
> + }
> +}
> +
> int create_listen_port(const char *bindaddr, int port)
> {
> static bool is_inet_socket = true;
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index 108318e..b7cb937 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -283,6 +283,7 @@ static inline bool is_aligned_to_pagesize(void *p)
>
> int create_listen_port(const char *bindaddr, int port);
> int init_unix_domain_socket(const char *dir);
> +void unregister_listening_fds(void);
>
> int init_store_driver(bool is_gateway);
> int init_global_pathnames(const char *d, char *);
> --
> 1.7.10.4
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
Applied thanks
Yuan
More information about the sheepdog
mailing list