[Sheepdog] [PATCH v4 6/6] remove oids from object list cache when deleting a vdi
levin li
levin108 at gmail.com
Thu May 3 12:25:49 CEST 2012
Cluster recovery may cause objects migrated from one node
to another, but the object list cache doesn't change, when
deleting an object we can not find the right node in whose
cache the id stays, so we need to notify the deletion list
to all the node to make them delete the specified object id
from object list cache.
Signed-off-by: levin li <xingke.lwp at taobao.com>
---
include/sheep.h | 1 +
sheep/object_list_cache.c | 17 ++++++++++++++
sheep/ops.c | 17 ++++++++++++++
sheep/sheep_priv.h | 1 +
sheep/vdi.c | 57 ++++++++++++++++++++++++++++++++++++++++++---
5 files changed, 90 insertions(+), 3 deletions(-)
diff --git a/include/sheep.h b/include/sheep.h
index f72460d..b4692d0 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -46,6 +46,7 @@
#define SD_OP_TRACE 0x95
#define SD_OP_TRACE_CAT 0x96
#define SD_OP_STAT_RECOVERY 0x97
+#define SD_OP_NOTIFY_VDI_DEL 0x98
#define SD_FLAG_CMD_IO_LOCAL 0x0010
#define SD_FLAG_CMD_RECOVERY 0x0020
diff --git a/sheep/object_list_cache.c b/sheep/object_list_cache.c
index 28cdbbc..339c865 100644
--- a/sheep/object_list_cache.c
+++ b/sheep/object_list_cache.c
@@ -130,6 +130,23 @@ int check_and_insert_objlist_cache(uint64_t oid)
return 0;
}
+int del_vdi_from_objlist_cache(uint64_t *oids, int count)
+{
+ int i;
+
+ dprintf("%d\n", count);
+ for (i = 0; i < count; i++) {
+ pthread_rwlock_wrlock(&obj_list_cache.lock);
+ if (!objlist_cache_rb_remove(&obj_list_cache.root, oids[i])) {
+ dprintf("remove oid %" PRIx64 " from objlist cache\n", oids[i]);
+ obj_list_cache.cache_size--;
+ }
+ pthread_rwlock_unlock(&obj_list_cache.lock);
+ }
+
+ return 0;
+}
+
int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *data)
{
uint64_t *list = (uint64_t *)data;
diff --git a/sheep/ops.c b/sheep/ops.c
index 439b714..0fabdab 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -550,6 +550,17 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
return ret;
}
+static int cluster_notify_vdi_deletion(const struct sd_req *req, struct sd_rsp *rsp,
+ void *data)
+{
+ int count = req->data_length / sizeof(uint64_t);
+ uint64_t *oids = data;
+
+ del_vdi_from_objlist_cache(oids, count);
+
+ return SD_RES_SUCCESS;
+}
+
static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
@@ -945,6 +956,12 @@ static struct sd_op_template sd_ops[] = {
.process_bottom = cluster_cleanup,
},
+ [SD_OP_NOTIFY_VDI_DEL] = {
+ .type = SD_OP_TYPE_CLUSTER,
+ .force = 1,
+ .process_bottom = cluster_notify_vdi_deletion,
+ },
+
/* local operations */
[SD_OP_GET_STORE_LIST] = {
.type = SD_OP_TYPE_LOCAL,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 8258738..66722c5 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -293,6 +293,7 @@ void resume_recovery_work(void);
int is_recoverying_oid(uint64_t oid);
int node_in_recovery(void);
+int del_vdi_from_objlist_cache(uint64_t *oids, int count);
int write_object(struct vnode_info *vnodes, uint32_t node_version,
uint64_t oid, char *data, unsigned int datalen,
uint64_t offset, uint16_t flags, int nr, int create);
diff --git a/sheep/vdi.c b/sheep/vdi.c
index f240303..3fd5397 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -10,6 +10,7 @@
*/
#include <stdio.h>
#include <stdlib.h>
+#include <unistd.h>
#include <sys/time.h>
#include "sheepdog_proto.h"
@@ -430,6 +431,39 @@ out:
return ret;
}
+static int notify_deletion(uint64_t *oids, uint32_t count)
+{
+ int fd, ret;
+ unsigned int wlen, rlen = 0;
+ struct sd_vdi_req hdr;
+ char host[128];
+
+ addr_to_str(host, sizeof(host), sys->this_node.addr, 0);
+
+ fd = connect_to(host, sys->this_node.port);
+ if (fd < 0) {
+ eprintf("connect to local node fail\n");
+ return -1;
+ }
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ hdr.proto_ver = SD_PROTO_VER;
+ hdr.opcode = SD_OP_NOTIFY_VDI_DEL;
+ hdr.flags = SD_FLAG_CMD_WRITE | SD_FLAG_CMD_WORKER;
+ hdr.data_length = sizeof(uint64_t) * count;
+ wlen = hdr.data_length;
+
+ ret = exec_req(fd, (struct sd_req *)&hdr, oids, &wlen, &rlen);
+ close(fd);
+
+ if (ret < 0) {
+ eprintf("send request fail\n");
+ return -1;
+ }
+
+ return 0;
+}
static void delete_one(struct work *work)
{
@@ -437,6 +471,8 @@ static void delete_one(struct work *work)
uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1);
int ret, i;
struct sheepdog_inode *inode = NULL;
+ uint64_t *deleted_oids = NULL;
+ uint32_t deleted_count = 0;
int nr_copies;
eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id);
@@ -447,6 +483,12 @@ static void delete_one(struct work *work)
goto out;
}
+ deleted_oids = malloc(sizeof(uint64_t) * MAX_DATA_OBJS);
+ if (!deleted_oids) {
+ eprintf("failed to allocate memory\n");
+ goto out;
+ }
+
nr_copies = get_nr_copies(dw->vnodes);
ret = read_object(dw->vnodes, dw->epoch, vid_to_vdi_oid(vdi_id),
@@ -461,6 +503,8 @@ static void delete_one(struct work *work)
dw->delete_error = 0;
for (i = 0; i < MAX_DATA_OBJS; i++) {
+ uint64_t oid;
+
if (!inode->data_vdi_id[i])
continue;
@@ -470,16 +514,22 @@ static void delete_one(struct work *work)
continue;
}
+ oid = vid_to_data_oid(inode->data_vdi_id[i], i);
+
ret = remove_object(dw->vnodes, dw->epoch,
- vid_to_data_oid(inode->data_vdi_id[i], i),
- nr_copies);
+ oid, inode->nr_copies);
if (ret != SD_RES_SUCCESS)
dw->delete_error = 1;
- else
+ else {
+ deleted_oids[deleted_count++] = oid;
inode->data_vdi_id[i] = 0;
+ }
}
+ if (deleted_count > 0)
+ notify_deletion(deleted_oids, deleted_count);
+
if (dw->delete_error) {
write_object(dw->vnodes, dw->epoch, vid_to_vdi_oid(vdi_id),
(void *)inode, sizeof(*inode), 0, 0, nr_copies, 0);
@@ -487,6 +537,7 @@ static void delete_one(struct work *work)
out:
free(inode);
+ free(deleted_oids);
}
static void delete_one_done(struct work *work)
--
1.7.10
More information about the sheepdog
mailing list