[sheepdog] [PATCH 3/3] object list cache: reclaim object list cache when receiving a deletion event.

levin li levin108 at gmail.com
Wed Jul 11 04:20:10 CEST 2012


From: levin li <xingke.lwp at taobao.com>

Before reclaiming the cache belonging to the VDI just deleted, we should test
whether the VDI is exist, because after some node delete it and before the
notification is sent to all the node, another node may issus a VDI creation
event and reused the VDI id again, in which case we should reclaim the cached
entry.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 sheep/object_list_cache.c |   51 +++++++++++++++++++++++++++++++++++++++++++++
 sheep/ops.c               |   14 ++++++++++++
 sheep/sheep_priv.h        |    2 +
 sheep/vdi.c               |   27 +++++++++++++++++++++++
 4 files changed, 94 insertions(+), 0 deletions(-)

diff --git a/sheep/object_list_cache.c b/sheep/object_list_cache.c
index 39e8d49..ce48b4c 100644
--- a/sheep/object_list_cache.c
+++ b/sheep/object_list_cache.c
@@ -37,6 +37,11 @@ struct objlist_cache {
 	pthread_rwlock_t lock;
 };
 
+struct objlist_reclaim_work {
+	uint32_t vid;
+	struct work work;
+};
+
 struct objlist_cache obj_list_cache = {
 	.tree_version	= 1,
 	.root		= RB_ROOT,
@@ -167,3 +172,49 @@ out:
 	pthread_rwlock_unlock(&obj_list_cache.lock);
 	return SD_RES_SUCCESS;
 }
+
+static void objlist_reclaim_work(struct work *work)
+{
+	struct objlist_reclaim_work *ow =
+		container_of(work, struct objlist_reclaim_work, work);
+	struct objlist_cache_entry *entry, *t;
+	uint32_t vid = ow->vid, entry_vid;
+
+	if (vdi_exist(vid)) {
+		eprintf("VDI (%" PRIx32 ") is still exist, can not reclaim\n",
+			vid);
+		return;
+	}
+
+	pthread_rwlock_wrlock(&obj_list_cache.lock);
+	list_for_each_entry_safe(entry, t, &obj_list_cache.entry_list, list) {
+		entry_vid = oid_to_vid(entry->oid);
+		if (entry_vid != vid)
+			continue;
+		dprintf("reclaim object entry %" PRIx64 "\n", entry->oid);
+		list_del(&entry->list);
+		rb_erase(&entry->node, &obj_list_cache.root);
+		free(entry);
+	}
+	pthread_rwlock_unlock(&obj_list_cache.lock);
+}
+
+static void objlist_reclaim_done(struct work *work)
+{
+	struct objlist_reclaim_work *ow =
+		container_of(work, struct objlist_reclaim_work, work);
+	free(ow);
+}
+
+int objlist_cache_reclaim(uint32_t vid)
+{
+	struct objlist_reclaim_work *ow;
+
+	ow = zalloc(sizeof(*ow));
+	ow->vid = vid;
+	ow->work.fn = objlist_reclaim_work;
+	ow->work.done = objlist_reclaim_done;
+	queue_work(sys->deletion_wqueue, &ow->work);
+
+	return SD_RES_SUCCESS;
+}
diff --git a/sheep/ops.c b/sheep/ops.c
index ecf4f2e..d8d53a2 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -485,6 +485,14 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
 	return ret;
 }
 
+static int cluster_notify_vdi_del(const struct sd_req *req, struct sd_rsp *rsp,
+				  void *data)
+{
+	uint32_t vid = *(uint32_t *)data;
+
+	return objlist_cache_reclaim(vid);
+}
+
 static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
 			   void *data)
 {
@@ -822,6 +830,12 @@ static struct sd_op_template sd_ops[] = {
 		.process_main = cluster_cleanup,
 	},
 
+	[SD_OP_NOTIFY_VDI_DEL] = {
+		.type = SD_OP_TYPE_CLUSTER,
+		.force = 1,
+		.process_main = cluster_notify_vdi_del,
+	},
+
 	/* local operations */
 	[SD_OP_GET_STORE_LIST] = {
 		.type = SD_OP_TYPE_LOCAL,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index afbc361..a2df7b7 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -195,6 +195,7 @@ int create_listen_port(int port, void *data);
 int init_store(const char *dir, int enable_write_cache);
 int init_base_path(const char *dir);
 
+int vdi_exist(uint32_t vid);
 int add_vdi(char *data, int data_len, uint64_t size, uint32_t *new_vid,
 	    uint32_t base_vid, int is_snapshot, unsigned int *nr_copies);
 
@@ -257,6 +258,7 @@ uint32_t get_latest_epoch(void);
 int set_cluster_ctime(uint64_t ctime);
 uint64_t get_cluster_ctime(void);
 int get_obj_list(const struct sd_list_req *, struct sd_list_rsp *, void *);
+int objlist_cache_reclaim(uint32_t vid);
 
 int start_recovery(struct vnode_info *cur_vnodes,
 	struct vnode_info *old_vnodes);
diff --git a/sheep/vdi.c b/sheep/vdi.c
index bcb3df1..c9e070e 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -15,6 +15,33 @@
 #include "sheepdog_proto.h"
 #include "sheep_priv.h"
 
+int vdi_exist(uint32_t vid)
+{
+	struct sheepdog_inode *inode;
+	int ret = 1;
+
+	inode = zalloc(sizeof(*inode));
+	if (!inode) {
+		ret = 0;
+		goto out;
+	}
+
+	ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
+			  sizeof(*inode), 0);
+	if (ret != SD_RES_SUCCESS) {
+		eprintf("fail to read vdi inode (%" PRIx32 ")\n", vid);
+		ret = 0;
+		goto out;
+	}
+
+	if (*inode->name == '\0')
+		ret = 0;
+	ret = 1;
+
+out:
+	free(inode);
+	return ret;
+}
 
 /* TODO: should be performed atomically */
 static int create_vdi_obj(char *name, uint32_t new_vid, uint64_t size,
-- 
1.7.1




More information about the sheepdog mailing list