[Sheepdog] [PATCH v2 2/2] remove oids from object list cache when deleting a vdi

Li Wenpeng levin108 at gmail.com
Sat Apr 28 08:52:32 CEST 2012


From: levin li <xingke.lwp at taobao.com>

Cluster recovery may cause objects removed from one node
to another, but the object list cache doesn't change, when
deleting an object we can not find the right node in whose
cache the id stays, so we need to notify the deletion list
to all the node to make them delete the specified object id
from object list cache.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 include/sheep.h    |    1 +
 sheep/ops.c        |   17 +++++++++++++++++
 sheep/sheep_priv.h |    1 +
 sheep/store.c      |   17 +++++++++++++++++
 sheep/vdi.c        |   50 +++++++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/include/sheep.h b/include/sheep.h
index 7e287c4..e941dc1 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -46,6 +46,7 @@
 #define SD_OP_TRACE          0x95
 #define SD_OP_TRACE_CAT      0x96
 #define SD_OP_STAT_RECOVERY  0x97
+#define SD_OP_NOTIFY_VDI_DEL 0x98
 
 #define SD_FLAG_CMD_IO_LOCAL   0x0010
 #define SD_FLAG_CMD_RECOVERY 0x0020
diff --git a/sheep/ops.c b/sheep/ops.c
index d5ba7fa..8cd6c47 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -461,6 +461,17 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
 	return ret;
 }
 
+static int cluster_notify_vdi_deletion(const struct sd_req *req, struct sd_rsp *rsp,
+				void *data)
+{
+	int count = req->data_length / sizeof(uint64_t);
+	uint64_t *oids = data;
+
+	del_vdi_from_objlist_cache(oids, count);
+
+	return SD_RES_SUCCESS;
+}
+
 static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
 			   void *data)
 {
@@ -616,6 +627,12 @@ static struct sd_op_template sd_ops[] = {
 		.process_main = cluster_cleanup,
 	},
 
+	[SD_OP_NOTIFY_VDI_DEL] = {
+		.type = SD_OP_TYPE_CLUSTER,
+		.force = 1,
+		.process_main = cluster_notify_vdi_deletion,
+	},
+
 	/* local operations */
 	[SD_OP_GET_STORE_LIST] = {
 		.type = SD_OP_TYPE_LOCAL,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index afd5c1b..848199e 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -295,6 +295,7 @@ void resume_recovery_work(void);
 int is_recoverying_oid(uint64_t oid);
 int node_in_recovery(void);
 
+int del_vdi_from_objlist_cache(uint64_t *oids, int count);
 int write_object(struct vnode_info *vnodes, uint32_t node_version,
 		 uint64_t oid, char *data, unsigned int datalen,
 		 uint64_t offset, uint16_t flags, int nr, int create);
diff --git a/sheep/store.c b/sheep/store.c
index 2ce8d50..6ccefa3 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -131,6 +131,23 @@ static int check_and_insert_objlist_cache(uint64_t oid)
 	return 0;
 }
 
+int del_vdi_from_objlist_cache(uint64_t *oids, int count)
+{
+	int i;
+	dprintf("%d\n", count);
+
+	for (i = 0; i < count; i++) {
+		pthread_rwlock_wrlock(&obj_list_cache.lock);
+		if (!objlist_cache_rb_remove(&obj_list_cache.root, oids[i])) {
+			dprintf("remove oid %" PRIx64 " from objlist cache\n", oids[i]);
+			obj_list_cache.cache_size--;
+		}
+		pthread_rwlock_unlock(&obj_list_cache.lock);
+	}
+
+	return 0;
+}
+
 static void get_store_dir(struct strbuf *buf, int epoch)
 {
 	if (!strcmp(sd_store->name, "simple"))
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 0f17824..af2953e 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -10,6 +10,7 @@
  */
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <sys/time.h>
 
 #include "sheepdog_proto.h"
@@ -439,6 +440,39 @@ out:
 	return ret;
 }
 
+static int notify_deletion(uint64_t *oids, uint32_t count)
+{
+	int fd, ret;
+	unsigned int wlen, rlen = 0;
+	struct sd_vdi_req hdr;
+	char host[128];
+
+	addr_to_str(host, sizeof(host), sys->this_node.addr, 0);
+
+	fd = connect_to(host, sys->this_node.port);
+	if (fd < 0) {
+		eprintf("connect to local node fail\n");
+		return -1;
+	}
+
+	memset(&hdr, 0, sizeof(hdr));
+
+	hdr.proto_ver = SD_PROTO_VER;
+	hdr.opcode = SD_OP_NOTIFY_VDI_DEL;
+	hdr.flags = SD_FLAG_CMD_WRITE | SD_FLAG_CMD_WORKER;
+	hdr.data_length = sizeof(uint64_t) * count;
+	wlen = hdr.data_length;
+
+	ret = exec_req(fd, (struct sd_req *)&hdr, oids, &wlen, &rlen);
+	close(fd);
+
+	if (ret < 0) {
+		eprintf("send request fail\n");
+		return -1;
+	}
+
+	return 0;
+}
 
 static void delete_one(struct work *work)
 {
@@ -446,6 +480,8 @@ static void delete_one(struct work *work)
 	uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1);
 	int ret, i;
 	struct sheepdog_inode *inode = NULL;
+	uint64_t deleted_oids[MAX_DATA_OBJS];
+	uint32_t deleted_count = 0;
 	int nr_copies;
 
 	eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id);
@@ -468,6 +504,8 @@ static void delete_one(struct work *work)
 	}
 
 	for (i = 0; i < MAX_DATA_OBJS; i++) {
+		uint64_t oid;
+
 		if (!inode->data_vdi_id[i])
 			continue;
 
@@ -477,16 +515,22 @@ static void delete_one(struct work *work)
 			continue;
 		}
 
+		oid = vid_to_data_oid(inode->data_vdi_id[i], i);
+
 		ret = remove_object(dw->vnodes, dw->epoch,
-			      vid_to_data_oid(inode->data_vdi_id[i], i),
-			      nr_copies);
+				  oid, inode->nr_copies);
 
 		if (ret != SD_RES_SUCCESS)
 			dw->delete_error = 1;
-		else
+		else {
+			deleted_oids[deleted_count++] = oid;
 			inode->data_vdi_id[i] = 0;
+		}
 	}
 
+	if (deleted_count > 0)
+		notify_deletion(deleted_oids, deleted_count);
+
 	if (dw->delete_error) {
 		write_object(dw->vnodes, dw->epoch, vid_to_vdi_oid(vdi_id),
 			     (void *)inode, sizeof(*inode), 0, 0, nr_copies, 0);
-- 
1.7.10




More information about the sheepdog mailing list