[sheepdog] [PATCH] lib: remove a fd leak bug in the sockfd cache
Hitoshi Mitake
mitake.hitoshi at gmail.com
Fri Feb 7 06:47:07 CET 2014
At Tue, 4 Feb 2014 16:25:57 +0900,
Hitoshi Mitake wrote:
>
> When a node leaves from sheepdog cluster, other nodes try to remove a
> sockfd cache entry of the node (calling
> sockfd_cache_del_node()). Current sockfd caching mechanism would have
> a possibility of fd leak in this sequence.
>
> Assume there is a worker thread which is using a cached sockfd
> connected to the leaving node during the above sequence (main
> thread is calling sockfd_cache_del_node()). In such a case,
> sockfd_cache_destroy() fails to destroy the sockfd cache entry (the
> debug message "Some victim still holds it" can be seen in a log).
>
> Sockfd caching mechanism assumes that the victims can destroy the
> entry after their requests fail. But it depends on timing. Other
> threads which use cached fds don't face fail if their requests are
> already executed correctly. And the fd leak problem would happen.
>
> This patch solves this fd leak problem by adding a new member
> need_to_destroy to struct sockfd_cache_entry. Threads turn on this
> variable true when destroy should be deferred. If the variable is
> true, other threads must try to destroy an entry when they put
> sockfds.
>
> Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> ---
> lib/sockfd_cache.c | 40 +++++++++++++++++++++++++++++++---------
> 1 file changed, 31 insertions(+), 9 deletions(-)
I found a bug in this version. Please ignore this one. I'll send v2
later.
Thanks,
Hitoshi
>
> diff --git a/lib/sockfd_cache.c b/lib/sockfd_cache.c
> index ec06091..3a25bbd 100644
> --- a/lib/sockfd_cache.c
> +++ b/lib/sockfd_cache.c
> @@ -70,6 +70,8 @@ struct sockfd_cache_entry {
> struct rb_node rb;
> struct node_id nid;
> struct sockfd_cache_fd *fds;
> +
> + uatomic_bool need_to_destroy;
> };
>
> static int sockfd_cache_cmp(const struct sockfd_cache_entry *a,
> @@ -159,32 +161,41 @@ static void free_cache_entry(struct sockfd_cache_entry *entry)
> * the victim node will finally find itself talking to a dead node and call
> * sockfd_cache_del() to delete this node from the cache.
> */
> -static bool sockfd_cache_destroy(const struct node_id *nid)
> +static bool sockfd_cache_destroy_nolock(const struct node_id *nid)
> {
> struct sockfd_cache_entry *entry;
>
> - sd_write_lock(&sockfd_cache.lock);
> entry = sockfd_cache_search(nid);
> if (!entry) {
> sd_debug("It is already destroyed");
> - goto false_out;
> + return false;
> }
>
> if (!slots_all_free(entry)) {
> sd_debug("Some victim still holds it");
> - goto false_out;
> + /* defer destroy */
> + uatomic_set_true(&entry->need_to_destroy);
> +
> + return false;
> }
>
> rb_erase(&entry->rb, &sockfd_cache.root);
> - sd_rw_unlock(&sockfd_cache.lock);
>
> destroy_all_slots(entry);
> free_cache_entry(entry);
>
> return true;
> -false_out:
> +}
> +
> +static bool sockfd_cache_destroy(const struct node_id *nid)
> +{
> + bool result;
> +
> + sd_write_lock(&sockfd_cache.lock);
> + result = sockfd_cache_destroy_nolock(nid);
> sd_rw_unlock(&sockfd_cache.lock);
> - return false;
> +
> + return result;
> }
>
> static void sockfd_cache_add_nolock(const struct node_id *nid)
> @@ -371,10 +382,16 @@ static void sockfd_cache_put_long(const struct node_id *nid, int idx)
>
> sd_debug("%s idx %d", addr_to_str(addr, port), idx);
>
> - sd_read_lock(&sockfd_cache.lock);
> + sd_write_lock(&sockfd_cache.lock);
> entry = sockfd_cache_search(nid);
> - if (entry)
> + if (entry) {
> uatomic_set_false(&entry->fds[idx].in_use);
> +
> + if (uatomic_is_true(&entry->need_to_destroy)) {
> + sd_debug("deferred destroy of cache fd is required");
> + sockfd_cache_destroy_nolock(nid);
> + }
> + }
> sd_rw_unlock(&sockfd_cache.lock);
> }
>
> @@ -393,6 +410,11 @@ static void sockfd_cache_close(const struct node_id *nid, int idx)
> close(entry->fds[idx].fd);
> entry->fds[idx].fd = -1;
> uatomic_set_false(&entry->fds[idx].in_use);
> +
> + if (uatomic_is_true(&entry->need_to_destroy)) {
> + sd_debug("deferred destroy of cache fd is required");
> + sockfd_cache_destroy_nolock(nid);
> + }
> }
> sd_rw_unlock(&sockfd_cache.lock);
> }
> --
> 1.7.10.4
>
More information about the sheepdog
mailing list