[sheepdog] [PATCH v2] sheep: prevent starvation of "node kill" requests
Hitoshi Mitake
mitake.hitoshi at lab.ntt.co.jp
Fri Feb 21 03:35:09 CET 2014
Under heavy load, sheep process continues to execute event_loop()
even if admins execute "dog node kill" because
sys->nr_outstanding_reqs rarely reaches 0. In our case, actual
exit of sheep process took about from 10 min to 1 hour to complete.
This patch prevents the starvation of the stop requests by
unregistering events of listening fds. By unregistering, sheep can
stop generating new requests.
Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
v2: update commit log. "cluster shutdown" could be completed quickly. the
previous commit log was wrong.
sheep/ops.c | 18 ++++++++++++------
sheep/request.c | 23 +++++++++++++++++++++++
sheep/sheep_priv.h | 1 +
3 files changed, 36 insertions(+), 6 deletions(-)
diff --git a/sheep/ops.c b/sheep/ops.c
index bd663eb..6451563 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -304,12 +304,17 @@ static int cluster_shutdown(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
sys->cinfo.status = SD_STATUS_SHUTDOWN;
- if (!node_in_recovery() && set_cluster_shutdown(true) != SD_RES_SUCCESS)
- /*
- * It's okay we failed to set 'shutdown', just start recovery
- * after restart blindly.
- */
- sd_err("failed to set cluster as shutdown");
+ if (!node_in_recovery()) {
+ unregister_listening_fds();
+
+ if (set_cluster_shutdown(true) != SD_RES_SUCCESS)
+ /*
+ * It's okay we failed to set 'shutdown', just start
+ * recovery after restart blindly.
+ */
+ sd_err("failed to set cluster as shutdown");
+ }
+
return SD_RES_SUCCESS;
}
@@ -910,6 +915,7 @@ static int local_kill_node(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
sys->cinfo.status = SD_STATUS_KILLED;
+ unregister_listening_fds();
return SD_RES_SUCCESS;
}
diff --git a/sheep/request.c b/sheep/request.c
index 5867fa1..4aa928e 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -996,11 +996,34 @@ static void listen_handler(int listen_fd, int events, void *data)
sd_debug("accepted a new connection: %d", fd);
}
+static LIST_HEAD(listening_fd_list);
+
+struct listening_fd {
+ int fd;
+ struct list_node list;
+};
+
static int create_listen_port_fn(int fd, void *data)
{
+ struct listening_fd *new_fd;
+
+ new_fd = xzalloc(sizeof(*new_fd));
+ new_fd->fd = fd;
+ list_add_tail(&new_fd->list, &listening_fd_list);
+
return register_event(fd, listen_handler, data);
}
+void unregister_listening_fds(void)
+{
+ struct listening_fd *fd;
+
+ list_for_each_entry(fd, &listening_fd_list, list) {
+ sd_debug("unregistering fd: %d", fd->fd);
+ unregister_event(fd->fd);
+ }
+}
+
int create_listen_port(const char *bindaddr, int port)
{
static bool is_inet_socket = true;
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 108318e..b7cb937 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -283,6 +283,7 @@ static inline bool is_aligned_to_pagesize(void *p)
int create_listen_port(const char *bindaddr, int port);
int init_unix_domain_socket(const char *dir);
+void unregister_listening_fds(void);
int init_store_driver(bool is_gateway);
int init_global_pathnames(const char *d, char *);
--
1.7.10.4
More information about the sheepdog
mailing list