[sheepdog] [PATCH 4/8] object cache: add object cache tree for every VDI

levin li levin108 at gmail.com
Mon Jul 9 08:29:18 CEST 2012


From: levin li <xingke.lwp at taobao.com>

Add object cache tree for every VDI to keep track of all the
objects cached by the VDI, for the reclaiming work.

When sheep starts, we should also read the cached objects in
disk which is created by the previous running, otherwise, these
cache objects may cause a disk leak.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 sheep/object_cache.c |  213 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 213 insertions(+), 0 deletions(-)

diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index de45bf1..d4313ff 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -21,6 +21,8 @@
 #include <errno.h>
 #include <sys/file.h>
 #include <dirent.h>
+#include <urcu/uatomic.h>
+#include <urcu/rculist.h>
 
 #include "sheep_priv.h"
 #include "util.h"
@@ -38,6 +40,21 @@
 #define CACHE_VDI_BIT         (UINT32_C(1) << CACHE_VDI_SHIFT)
 #define CACHE_BLOCK_SIZE      ((UINT64_C(1) << 10) * 64) /* 64 KB */
 
+struct global_cache {
+	uint64_t cache_size;
+	int reclaiming;
+	struct cds_list_head cache_lru_list;
+};
+
+struct object_cache_entry {
+	uint32_t idx;
+	int refcnt;
+	int flags;
+	struct rb_node node;
+	struct object_cache *oc;
+	struct cds_list_head lru_list;
+};
+
 struct object_cache {
 	uint32_t vid;
 	struct hlist_node hash;
@@ -48,6 +65,8 @@ struct object_cache {
 	struct rb_root dirty_trees[2];
 	struct rb_root *active_dirty_tree;
 
+	struct rb_root object_tree;
+
 	pthread_mutex_t lock;
 };
 
@@ -57,9 +76,11 @@ struct dirty_cache_entry {
 			* block which should be flushed */
 	struct rb_node rb;
 	struct list_head list;
+	struct object_cache_entry *sys_entry;
 	int create;
 };
 
+static struct global_cache sys_cache;
 static char cache_dir[PATH_MAX];
 static int def_open_flags = O_RDWR;
 
@@ -105,6 +126,32 @@ static uint64_t calc_object_bmap(size_t len, off_t offset)
 	return bmap;
 }
 
+static struct object_cache_entry *
+object_cache_insert(struct rb_root *root, struct object_cache_entry *new)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct object_cache_entry *entry;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct object_cache_entry, node);
+
+		if (new->idx < entry->idx)
+			p = &(*p)->rb_left;
+		else if (new->idx > entry->idx)
+			p = &(*p)->rb_right;
+		else {
+			/* already has this entry */
+			return entry;
+		}
+	}
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, root);
+
+	return NULL; /* insert successfully */
+}
+
 static struct dirty_cache_entry *
 dirty_tree_insert(struct rb_root *root, struct dirty_cache_entry *new)
 {
@@ -152,6 +199,26 @@ static struct dirty_cache_entry *dirty_tree_search(struct rb_root *root,
 	return NULL;
 }
 
+static struct object_cache_entry *object_tree_search(struct rb_root *root,
+						     uint32_t idx)
+{
+	struct rb_node *n = root->rb_node;
+	struct object_cache_entry *t;
+
+	while (n) {
+		t = rb_entry(n, struct object_cache_entry, node);
+
+		if (idx < t->idx)
+			n = n->rb_left;
+		else if (idx > t->idx)
+			n = n->rb_right;
+		else
+			return t; /* found it */
+	}
+
+	return NULL;
+}
+
 static int create_dir_for(uint32_t vid)
 {
 	int ret = 0;
@@ -275,6 +342,36 @@ alloc_cache_entry(uint32_t idx, uint64_t bmap, int create)
 	return entry;
 }
 
+static struct object_cache_entry *
+add_to_object_cache(struct object_cache *oc, uint32_t idx)
+{
+	struct object_cache_entry *entry, *old;
+	uint32_t data_length;
+
+	if (idx_has_vdi_bit(idx))
+		data_length = SD_INODE_SIZE / 1024;
+	else
+		data_length = SD_DATA_OBJ_SIZE / 1024;
+
+	entry = zalloc(sizeof(*entry));
+	entry->oc = oc;
+	entry->idx = idx;
+	CDS_INIT_LIST_HEAD(&entry->lru_list);
+
+	pthread_mutex_lock(&oc->lock);
+	old = object_cache_insert(&oc->object_tree, entry);
+	if (!old) {
+		uatomic_add(&sys_cache.cache_size, data_length);
+		cds_list_add_rcu(&entry->lru_list, &sys_cache.cache_lru_list);
+	} else {
+		free(entry);
+		entry = old;
+	}
+	pthread_mutex_unlock(&oc->lock);
+
+	return entry;
+}
+
 static int object_cache_lookup(struct object_cache *oc, uint32_t idx,
 			       int create)
 {
@@ -309,6 +406,8 @@ static int object_cache_lookup(struct object_cache *oc, uint32_t idx,
 		else {
 			uint64_t bmap = UINT64_MAX;
 
+			add_to_object_cache(oc, idx);
+
 			entry = alloc_cache_entry(idx, bmap, 1);
 			pthread_mutex_lock(&oc->lock);
 			add_to_dirty_tree_and_list(oc, entry);
@@ -492,6 +591,7 @@ out:
 	return ret;
 }
 
+
 /* Fetch the object, cache it in success */
 static int object_cache_pull(struct object_cache *oc, uint32_t idx)
 {
@@ -524,6 +624,8 @@ static int object_cache_pull(struct object_cache *oc, uint32_t idx)
 	if (ret == SD_RES_SUCCESS) {
 		dprintf("oid %"PRIx64" pulled successfully\n", oid);
 		ret = create_cache_object(oc, idx, buf, data_length);
+		if (ret == SD_RES_SUCCESS)
+			add_to_object_cache(oc, idx);
 	}
 	free(buf);
 out:
@@ -888,6 +990,112 @@ out:
 	return;
 }
 
+static int check_cache_object_sanity(struct object_cache *cache, uint32_t idx)
+{
+	struct strbuf idx_buf;
+	unsigned data_length;
+	struct stat st;
+	int ret = SD_RES_SUCCESS;
+
+	strbuf_init(&idx_buf, PATH_MAX);
+	strbuf_addstr(&idx_buf, cache_dir);
+	strbuf_addf(&idx_buf, "/%06"PRIx32"/%08"PRIx32, cache->vid, idx);
+
+	if (stat(idx_buf.buf, &st) < 0) {
+		ret = unlink(idx_buf.buf);
+		eprintf("%m\n");
+		goto out;
+	}
+
+	if (idx_has_vdi_bit(idx))
+		data_length = SD_INODE_SIZE;
+	else
+		data_length = SD_DATA_OBJ_SIZE;
+
+	if (data_length != st.st_size) {
+		eprintf("inconsistent data length\n");
+		ret = unlink(idx_buf.buf);
+	}
+
+out:
+	strbuf_release(&idx_buf);
+	return ret;
+}
+
+static int load_existing_cache_object(struct object_cache *cache)
+{
+	DIR *dir;
+	struct dirent *d;
+	uint32_t idx;
+	struct strbuf idx_buf;
+	int ret = 0;
+
+	strbuf_init(&idx_buf, PATH_MAX);
+	strbuf_addstr(&idx_buf, cache_dir);
+	strbuf_addf(&idx_buf, "/%06"PRIx32, cache->vid);
+
+	dir = opendir(idx_buf.buf);
+	if (!dir) {
+		dprintf("%m\n");
+		ret = -1;
+		goto out;
+	}
+
+	while ((d = readdir(dir))) {
+		if (!strncmp(d->d_name, ".", 1))
+			continue;
+		idx = strtoul(d->d_name, NULL, 16);
+		if (idx == ULLONG_MAX)
+			continue;
+
+		if (check_cache_object_sanity(cache, idx) < 0)
+			continue;
+
+		add_to_object_cache(cache, idx);
+		dprintf("load cache %06" PRIx32 "/%08" PRIx32 "\n",
+			cache->vid, idx);
+	}
+
+out:
+	strbuf_release(&idx_buf);
+	return ret;
+}
+
+static int load_existing_cache(void)
+{
+	DIR *dir;
+	struct dirent *d;
+	uint32_t vid;
+	struct object_cache *cache;
+	struct strbuf vid_buf;
+	int ret = 0;
+
+	strbuf_init(&vid_buf, PATH_MAX);
+	strbuf_addstr(&vid_buf, cache_dir);
+
+	dir = opendir(vid_buf.buf);
+	if (!dir) {
+		dprintf("%m\n");
+		ret = -1;
+		goto out;
+	}
+
+	while ((d = readdir(dir))) {
+		if (!strncmp(d->d_name, ".", 1))
+			continue;
+		vid = strtoul(d->d_name, NULL, 16);
+		if (vid == ULLONG_MAX)
+			continue;
+
+		cache = find_object_cache(vid, 1);
+		load_existing_cache_object(cache);
+	}
+
+out:
+	strbuf_release(&vid_buf);
+	return ret;
+}
+
 int object_cache_init(const char *p)
 {
 	int ret = 0;
@@ -903,6 +1111,11 @@ int object_cache_init(const char *p)
 		}
 	}
 	strbuf_copyout(&buf, cache_dir, sizeof(cache_dir));
+
+	CDS_INIT_LIST_HEAD(&sys_cache.cache_lru_list);
+	uatomic_set(&sys_cache.cache_size, 0);
+
+	ret = load_existing_cache();
 err:
 	strbuf_release(&buf);
 	return ret;
-- 
1.7.1




More information about the sheepdog mailing list