When a node leaves from sheepdog cluster, other nodes try to remove a sockfd cache entry of the node (calling sockfd_cache_del_node()). Current sockfd caching mechanism would have a possibility of fd leak in this sequence. Assume there is a worker thread which is using a cached sockfd connected to the leaving node during the above sequence (main thread is calling sockfd_cache_del_node()). In such a case, sockfd_cache_destroy() fails to destroy the sockfd cache entry (the debug message "Some victim still holds it" can be seen in a log). Sockfd caching mechanism assumes that the victims can destroy the entry after their requests fail. But it depends on timing. Other threads which use cached fds don't face fail if their requests are already executed correctly. And the fd leak problem would happen. This patch solves this fd leak problem by adding a new member need_to_destroy to struct sockfd_cache_entry. Threads turn on this variable true when destroy should be deferred. If the variable is true, other threads must try to destroy an entry when they put sockfds. Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp> --- lib/sockfd_cache.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/lib/sockfd_cache.c b/lib/sockfd_cache.c index ec06091..3a25bbd 100644 --- a/lib/sockfd_cache.c +++ b/lib/sockfd_cache.c @@ -70,6 +70,8 @@ struct sockfd_cache_entry { struct rb_node rb; struct node_id nid; struct sockfd_cache_fd *fds; + + uatomic_bool need_to_destroy; }; static int sockfd_cache_cmp(const struct sockfd_cache_entry *a, @@ -159,32 +161,41 @@ static void free_cache_entry(struct sockfd_cache_entry *entry) * the victim node will finally find itself talking to a dead node and call * sockfd_cache_del() to delete this node from the cache. */ -static bool sockfd_cache_destroy(const struct node_id *nid) +static bool sockfd_cache_destroy_nolock(const struct node_id *nid) { struct sockfd_cache_entry *entry; - sd_write_lock(&sockfd_cache.lock); entry = sockfd_cache_search(nid); if (!entry) { sd_debug("It is already destroyed"); - goto false_out; + return false; } if (!slots_all_free(entry)) { sd_debug("Some victim still holds it"); - goto false_out; + /* defer destroy */ + uatomic_set_true(&entry->need_to_destroy); + + return false; } rb_erase(&entry->rb, &sockfd_cache.root); - sd_rw_unlock(&sockfd_cache.lock); destroy_all_slots(entry); free_cache_entry(entry); return true; -false_out: +} + +static bool sockfd_cache_destroy(const struct node_id *nid) +{ + bool result; + + sd_write_lock(&sockfd_cache.lock); + result = sockfd_cache_destroy_nolock(nid); sd_rw_unlock(&sockfd_cache.lock); - return false; + + return result; } static void sockfd_cache_add_nolock(const struct node_id *nid) @@ -371,10 +382,16 @@ static void sockfd_cache_put_long(const struct node_id *nid, int idx) sd_debug("%s idx %d", addr_to_str(addr, port), idx); - sd_read_lock(&sockfd_cache.lock); + sd_write_lock(&sockfd_cache.lock); entry = sockfd_cache_search(nid); - if (entry) + if (entry) { uatomic_set_false(&entry->fds[idx].in_use); + + if (uatomic_is_true(&entry->need_to_destroy)) { + sd_debug("deferred destroy of cache fd is required"); + sockfd_cache_destroy_nolock(nid); + } + } sd_rw_unlock(&sockfd_cache.lock); } @@ -393,6 +410,11 @@ static void sockfd_cache_close(const struct node_id *nid, int idx) close(entry->fds[idx].fd); entry->fds[idx].fd = -1; uatomic_set_false(&entry->fds[idx].in_use); + + if (uatomic_is_true(&entry->need_to_destroy)) { + sd_debug("deferred destroy of cache fd is required"); + sockfd_cache_destroy_nolock(nid); + } } sd_rw_unlock(&sockfd_cache.lock); } -- 1.7.10.4 |