From: levin li <xingke.lwp at taobao.com> when deleting data objects, we need to remove their oid from all the nodes' objlist cache, so we record which objects are deleted, and notify a deletion list to all the nodes, every node removes the oids in the list from its cache. Signed-off-by: levin li <xingke.lwp at taobao.com> --- include/sheep.h | 1 + sheep/ops.c | 17 +++++++++++++++++ sheep/sheep_priv.h | 1 + sheep/store.c | 16 ++++++++++++++++ sheep/vdi.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 81 insertions(+), 4 deletions(-) diff --git a/include/sheep.h b/include/sheep.h index fc2ac58..b80bc01 100644 --- a/include/sheep.h +++ b/include/sheep.h @@ -45,6 +45,7 @@ #define SD_OP_CLEANUP 0x94 #define SD_OP_TRACE 0x95 #define SD_OP_TRACE_CAT 0x96 +#define SD_OP_NOTIFY_VDI_DEL 0x97 #define SD_FLAG_CMD_IO_LOCAL 0x0010 #define SD_FLAG_CMD_RECOVERY 0x0020 diff --git a/sheep/ops.c b/sheep/ops.c index 54e866c..b90c16b 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -452,6 +452,17 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp, return ret; } +static int cluster_notify_vdi_deletion(const struct sd_req *req, struct sd_rsp *rsp, + void *data) +{ + int count = req->data_length / sizeof(uint64_t); + uint64_t *oids = data; + + del_vdi_from_objlist_cache(oids, count); + + return SD_RES_SUCCESS; +} + static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp, void *data) { @@ -607,6 +618,12 @@ static struct sd_op_template sd_ops[] = { .process_main = cluster_cleanup, }, + [SD_OP_NOTIFY_VDI_DEL] = { + .type = SD_OP_TYPE_CLUSTER, + .force = 1, + .process_main = cluster_notify_vdi_deletion, + }, + /* local operations */ [SD_OP_GET_STORE_LIST] = { .type = SD_OP_TYPE_LOCAL, diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 1c27808..4330a62 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -292,6 +292,7 @@ void resume_recovery_work(void); int is_recoverying_oid(uint64_t oid); int node_in_recovery(void); +int del_vdi_from_objlist_cache(uint64_t *oids, int count); int write_object(struct sd_vnode *e, int vnodes, int zones, uint32_t node_version, uint64_t oid, char *data, unsigned int datalen, diff --git a/sheep/store.c b/sheep/store.c index dac0bff..350c5af 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -133,6 +133,22 @@ static int check_and_insert_objlist_cache(uint64_t oid) return 0; } +int del_vdi_from_objlist_cache(uint64_t *oids, int count) +{ + int i; + dprintf("%d\n", count); + + for (i = 0; i < count; i++) { + dprintf("remove oid %" PRIx64 " from objlist cache\n", oids[i]); + pthread_rwlock_wrlock(&obj_list_cache.lock); + if (!objlist_cache_rb_remove(&obj_list_cache.root, oids[i])) + obj_list_cache.cache_size--; + pthread_rwlock_unlock(&obj_list_cache.lock); + } + + return 0; +} + static int obj_cmp(const void *oid1, const void *oid2) { const uint64_t hval1 = fnv_64a_buf((void *)oid1, sizeof(uint64_t), FNV1A_64_INIT); diff --git a/sheep/vdi.c b/sheep/vdi.c index 6277de8..a654e3d 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -10,6 +10,7 @@ */ #include <stdio.h> #include <stdlib.h> +#include <unistd.h> #include <sys/time.h> #include "sheepdog_proto.h" @@ -449,6 +450,39 @@ out: return ret; } +static int notify_deletion(uint64_t *oids, uint32_t count) +{ + int fd, ret; + unsigned int wlen, rlen = 0; + struct sd_vdi_req hdr; + char host[128]; + + addr_to_str(host, sizeof(host), sys->this_node.addr, 0); + + fd = connect_to(host, sys->this_node.port); + if (fd < 0) { + eprintf("connect to local node fail\n"); + return -1; + } + + memset(&hdr, 0, sizeof(hdr)); + + hdr.proto_ver = SD_PROTO_VER; + hdr.opcode = SD_OP_NOTIFY_VDI_DEL; + hdr.flags = SD_FLAG_CMD_WRITE; + hdr.data_length = sizeof(uint64_t) * count; + wlen = hdr.data_length; + + ret = exec_req(fd, (struct sd_req *)&hdr, oids, &wlen, &rlen); + close(fd); + + if (ret < 0) { + eprintf("send request fail\n"); + return -1; + } + + return 0; +} static void delete_one(struct work *work) { @@ -456,6 +490,8 @@ static void delete_one(struct work *work) uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1); int ret, i; struct sheepdog_inode *inode = NULL; + uint64_t deleted_oids[MAX_DATA_OBJS]; + uint32_t deleted_count = 0; eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id); @@ -475,6 +511,8 @@ static void delete_one(struct work *work) } for (i = 0; i < MAX_DATA_OBJS; i++) { + uint64_t oid; + if (!inode->data_vdi_id[i]) continue; @@ -484,21 +522,25 @@ static void delete_one(struct work *work) continue; } + oid = vid_to_data_oid(inode->data_vdi_id[i], i); + ret = remove_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch, - vid_to_data_oid(inode->data_vdi_id[i], i), - inode->nr_copies); + oid, inode->nr_copies); if (ret != SD_RES_SUCCESS) dw->delete_error = 1; - else + else { + deleted_oids[deleted_count++] = oid; inode->data_vdi_id[i] = 0; + } } + notify_deletion(deleted_oids, deleted_count); + if (dw->delete_error) write_object(dw->entries, dw->nr_vnodes, dw->nr_zones, dw->epoch, vid_to_vdi_oid(vdi_id), (void *)inode, sizeof(*inode), 0, 0, inode->nr_copies, 0); - out: free(inode); } -- 1.7.1 |