[sheepdog] [PATCH v2] sheep: prevent starvation of "node kill" requests

Liu Yuan namei.unix at gmail.com
Fri Feb 21 06:21:21 CET 2014


On Fri, Feb 21, 2014 at 11:35:09AM +0900, Hitoshi Mitake wrote:
> Under heavy load, sheep process continues to execute event_loop()
> even if admins execute "dog node kill" because
> sys->nr_outstanding_reqs rarely reaches 0. In our case, actual
> exit of sheep process took about from 10 min to 1 hour to complete.
> 
> This patch prevents the starvation of the stop requests by
> unregistering events of listening fds. By unregistering, sheep can
> stop generating new requests.
> 
> Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> ---
> 
> v2: update commit log. "cluster shutdown" could be completed quickly. the
>     previous commit log was wrong.
> 
>  sheep/ops.c        |   18 ++++++++++++------
>  sheep/request.c    |   23 +++++++++++++++++++++++
>  sheep/sheep_priv.h |    1 +
>  3 files changed, 36 insertions(+), 6 deletions(-)
> 
> diff --git a/sheep/ops.c b/sheep/ops.c
> index bd663eb..6451563 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -304,12 +304,17 @@ static int cluster_shutdown(const struct sd_req *req, struct sd_rsp *rsp,
>  			    void *data)
>  {
>  	sys->cinfo.status = SD_STATUS_SHUTDOWN;
> -	if (!node_in_recovery() && set_cluster_shutdown(true) != SD_RES_SUCCESS)
> -		/*
> -		 * It's okay we failed to set 'shutdown', just start recovery
> -		 * after restart blindly.
> -		 */
> -		sd_err("failed to set cluster as shutdown");
> +	if (!node_in_recovery()) {
> +		unregister_listening_fds();
> +
> +		if (set_cluster_shutdown(true) != SD_RES_SUCCESS)
> +			/*
> +			 * It's okay we failed to set 'shutdown', just start
> +			 * recovery after restart blindly.
> +			 */
> +			sd_err("failed to set cluster as shutdown");
> +	}
> +
>  	return SD_RES_SUCCESS;
>  }
>  
> @@ -910,6 +915,7 @@ static int local_kill_node(const struct sd_req *req, struct sd_rsp *rsp,
>  			   void *data)
>  {
>  	sys->cinfo.status = SD_STATUS_KILLED;
> +	unregister_listening_fds();
>  
>  	return SD_RES_SUCCESS;
>  }
> diff --git a/sheep/request.c b/sheep/request.c
> index 5867fa1..4aa928e 100644
> --- a/sheep/request.c
> +++ b/sheep/request.c
> @@ -996,11 +996,34 @@ static void listen_handler(int listen_fd, int events, void *data)
>  	sd_debug("accepted a new connection: %d", fd);
>  }
>  
> +static LIST_HEAD(listening_fd_list);
> +
> +struct listening_fd {
> +	int fd;
> +	struct list_node list;
> +};
> +
>  static int create_listen_port_fn(int fd, void *data)
>  {
> +	struct listening_fd *new_fd;
> +
> +	new_fd = xzalloc(sizeof(*new_fd));
> +	new_fd->fd = fd;
> +	list_add_tail(&new_fd->list, &listening_fd_list);
> +
>  	return register_event(fd, listen_handler, data);
>  }
>  
> +void unregister_listening_fds(void)
> +{
> +	struct listening_fd *fd;
> +
> +	list_for_each_entry(fd, &listening_fd_list, list) {
> +		sd_debug("unregistering fd: %d", fd->fd);
> +		unregister_event(fd->fd);
> +	}
> +}
> +
>  int create_listen_port(const char *bindaddr, int port)
>  {
>  	static bool is_inet_socket = true;
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index 108318e..b7cb937 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -283,6 +283,7 @@ static inline bool is_aligned_to_pagesize(void *p)
>  
>  int create_listen_port(const char *bindaddr, int port);
>  int init_unix_domain_socket(const char *dir);
> +void unregister_listening_fds(void);
>  
>  int init_store_driver(bool is_gateway);
>  int init_global_pathnames(const char *d, char *);
> -- 
> 1.7.10.4
> 
> -- 
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog

Applied thanks

Yuan



More information about the sheepdog mailing list