[sheepdog] [PATCH] lib: remove a fd leak bug in the sockfd cache

Tue Feb 4 08:25:57 CET 2014

When a node leaves from sheepdog cluster, other nodes try to remove a
sockfd cache entry of the node (calling
sockfd_cache_del_node()). Current sockfd caching mechanism would have
a possibility of fd leak in this sequence.

Assume there is a worker thread which is using a cached sockfd
connected to the leaving node during the above sequence (main
thread is calling sockfd_cache_del_node()). In such a case,
sockfd_cache_destroy() fails to destroy the sockfd cache entry (the
debug message "Some victim still holds it" can be seen in a log).

Sockfd caching mechanism assumes that the victims can destroy the
entry after their requests fail. But it depends on timing. Other
threads which use cached fds don't face fail if their requests are
already executed correctly. And the fd leak problem would happen.

This patch solves this fd leak problem by adding a new member
need_to_destroy to struct sockfd_cache_entry. Threads turn on this
variable true when destroy should be deferred. If the variable is
true, other threads must try to destroy an entry when they put
sockfds.

Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 lib/sockfd_cache.c |   40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/lib/sockfd_cache.c b/lib/sockfd_cache.c
index ec06091..3a25bbd 100644
--- a/lib/sockfd_cache.c
+++ b/lib/sockfd_cache.c
@@ -70,6 +70,8 @@ struct sockfd_cache_entry {
 	struct rb_node rb;
 	struct node_id nid;
 	struct sockfd_cache_fd *fds;
+
+	uatomic_bool need_to_destroy;
 };
 
 static int sockfd_cache_cmp(const struct sockfd_cache_entry *a,
@@ -159,32 +161,41 @@ static void free_cache_entry(struct sockfd_cache_entry *entry)
  * the victim node will finally find itself talking to a dead node and call
  * sockfd_cache_del() to delete this node from the cache.
  */
-static bool sockfd_cache_destroy(const struct node_id *nid)
+static bool sockfd_cache_destroy_nolock(const struct node_id *nid)
 {
 	struct sockfd_cache_entry *entry;
 
-	sd_write_lock(&sockfd_cache.lock);
 	entry = sockfd_cache_search(nid);
 	if (!entry) {
 		sd_debug("It is already destroyed");
-		goto false_out;
+		return false;
 	}
 
 	if (!slots_all_free(entry)) {
 		sd_debug("Some victim still holds it");
-		goto false_out;
+		/* defer destroy */
+		uatomic_set_true(&entry->need_to_destroy);
+
+		return false;
 	}
 
 	rb_erase(&entry->rb, &sockfd_cache.root);
-	sd_rw_unlock(&sockfd_cache.lock);
 
 	destroy_all_slots(entry);
 	free_cache_entry(entry);
 
 	return true;
-false_out:
+}
+
+static bool sockfd_cache_destroy(const struct node_id *nid)
+{
+	bool result;
+
+	sd_write_lock(&sockfd_cache.lock);
+	result = sockfd_cache_destroy_nolock(nid);
 	sd_rw_unlock(&sockfd_cache.lock);
-	return false;
+
+	return result;
 }
 
 static void sockfd_cache_add_nolock(const struct node_id *nid)
@@ -371,10 +382,16 @@ static void sockfd_cache_put_long(const struct node_id *nid, int idx)
 
 	sd_debug("%s idx %d", addr_to_str(addr, port), idx);
 
-	sd_read_lock(&sockfd_cache.lock);
+	sd_write_lock(&sockfd_cache.lock);
 	entry = sockfd_cache_search(nid);
-	if (entry)
+	if (entry) {
 		uatomic_set_false(&entry->fds[idx].in_use);
+
+		if (uatomic_is_true(&entry->need_to_destroy)) {
+			sd_debug("deferred destroy of cache fd is required");
+			sockfd_cache_destroy_nolock(nid);
+		}
+	}
 	sd_rw_unlock(&sockfd_cache.lock);
 }
 
@@ -393,6 +410,11 @@ static void sockfd_cache_close(const struct node_id *nid, int idx)
 		close(entry->fds[idx].fd);
 		entry->fds[idx].fd = -1;
 		uatomic_set_false(&entry->fds[idx].in_use);
+
+		if (uatomic_is_true(&entry->need_to_destroy)) {
+			sd_debug("deferred destroy of cache fd is required");
+			sockfd_cache_destroy_nolock(nid);
+		}
 	}
 	sd_rw_unlock(&sockfd_cache.lock);
 }
-- 
1.7.10.4