[sheepdog] [PATCH RFC 2/5] sockfd: implement shrinking mechanism for handling EMFILE

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Fri Jul 12 03:54:23 CEST 2013


From: Hitoshi Mitake <mitake.hitoshi at gmail.com>

sockfd is a big fd consumer and cached fds should be closed when sheep
faces EMFILE. This patch adds a new function, shrink_sockfd(), for
closing unused cached fds.

Current mechanism for choosing a victim fd is too simple. It chooses a
node in a manner of round robin. We should employ a better algorithm
like LRU in the future.

This patch also adds a function retry_open(), a wrapper of open(2)
which calls shrink_sockfd() and retries open() after faces EFMILE.

Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 sheep/sheep_priv.h   |    2 ++
 sheep/sockfd_cache.c |   84 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)

diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 546f152..7371414 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -414,6 +414,8 @@ void sheep_put_sockfd(const struct node_id *, struct sockfd *);
 void sheep_del_sockfd(const struct node_id *, struct sockfd *);
 int sheep_exec_req(const struct node_id *nid, struct sd_req *hdr, void *data);
 bool sheep_need_retry(uint32_t epoch);
+int retry_open(const char *pathname, int flags);
+bool shrink_sockfd(void);
 
 /* journal_file.c */
 int journal_file_init(const char *path, size_t size, bool skip);
diff --git a/sheep/sockfd_cache.c b/sheep/sockfd_cache.c
index 55c337c..37a6a5e 100644
--- a/sheep/sockfd_cache.c
+++ b/sheep/sockfd_cache.c
@@ -35,6 +35,12 @@ struct sockfd_cache {
 	int count;
 };
 
+/*
+ * shrink_head: used by shrink_sockfd() for fair shrinking
+ * protected by sockfd_cache.lock
+ */
+static struct rb_node *shrink_head;
+
 static struct sockfd_cache sockfd_cache = {
 	.root = RB_ROOT,
 	.lock = PTHREAD_RWLOCK_INITIALIZER,
@@ -196,6 +202,9 @@ static bool sockfd_cache_destroy(const struct node_id *nid)
 		goto false_out;
 	}
 
+	if (&entry->rb == shrink_head)
+		shrink_head = rb_next(&entry->rb);
+
 	rb_erase(&entry->rb, &sockfd_cache.root);
 	pthread_rwlock_unlock(&sockfd_cache.lock);
 
@@ -546,3 +555,78 @@ bool sheep_need_retry(uint32_t epoch)
 {
 	return sys_epoch() == epoch;
 }
+
+bool shrink_sockfd(void)
+{
+	bool ret = false;
+
+	pthread_rwlock_wrlock(&sockfd_cache.lock);
+
+	struct rb_node *p = shrink_head ?
+		shrink_head : rb_first(&sockfd_cache.root);
+
+	if (!p) {
+		sd_dprintf("There's no sockfd");
+		goto out;
+	}
+
+	struct rb_node *first = p;
+
+	do {
+		struct sockfd_cache_entry *entry =
+			rb_entry(p, struct sockfd_cache_entry, rb);
+
+		for (int i = 0; i < fds_count; i++) {
+			if (!uatomic_set_true(&entry->fds[i].in_use))
+				/* failed to grab, someone is using */
+				continue;
+
+			if (entry->fds[i].fd == -1) {
+				/* this fd is not used */
+				uatomic_set_false(&entry->fds[i].in_use);
+				continue;
+			}
+
+			sd_dprintf("victim node: %s, fd: %d",
+				nid_to_str(&entry->nid), entry->fds[i].fd);
+			close(entry->fds[i].fd);
+			entry->fds[i].fd = -1;
+			uatomic_set_false(&entry->fds[i].in_use);
+
+			shrink_head = rb_next(p);
+
+			ret = true;
+			goto out;
+		}
+
+		p = rb_next(p);
+		if (!p)
+			p = rb_first(&sockfd_cache.root);
+	} while (first != p);
+
+	sd_dprintf("shrinking couldn't be done");
+
+out:
+	pthread_rwlock_unlock(&sockfd_cache.lock);
+	return ret;
+}
+
+int retry_open(const char *pathname, int flags)
+{
+	int retry_count = 2;
+
+retry:;
+	int fd = open(pathname, flags, sd_def_fmode);
+	if (0 <= fd)
+		return fd;
+
+	if (errno == EMFILE && retry_count--) {
+		if (!shrink_sockfd())
+			/* wait for close() of other threads */
+			usleep(100);
+
+		goto retry;
+	}
+
+	return fd;
+}
-- 
1.7.10.4




More information about the sheepdog mailing list