[sheepdog] [PATCH v2 3/3] object list cache: reclaim object list cache when receiving a deletion event.
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Thu Jul 12 00:01:53 CEST 2012
At Wed, 11 Jul 2012 14:42:47 +0800,
levin li wrote:
>
> From: levin li <xingke.lwp at taobao.com>
>
> Before reclaiming the cache belonging to the VDI just deleted, we should test
> whether the VDI is exist, because after some node delete it and before the
> notification is sent to all the node, another node may issus a VDI creation
> event and reused the VDI id again, in which case we should reclaim the cached
> entry.
Should this be explained in the source code where vdi_exist() is
called?
>
> Signed-off-by: levin li <xingke.lwp at taobao.com>
> ---
> sheep/object_list_cache.c | 57 +++++++++++++++++++++++++++++++++++++++++++++
> sheep/ops.c | 14 +++++++++++
> sheep/sheep_priv.h | 2 +
> sheep/vdi.c | 27 +++++++++++++++++++++
> 4 files changed, 100 insertions(+), 0 deletions(-)
>
> diff --git a/sheep/object_list_cache.c b/sheep/object_list_cache.c
> index 39e8d49..479064c 100644
> --- a/sheep/object_list_cache.c
> +++ b/sheep/object_list_cache.c
> @@ -37,6 +37,11 @@ struct objlist_cache {
> pthread_rwlock_t lock;
> };
>
> +struct objlist_deletion_work {
> + uint32_t vid;
> + struct work work;
> +};
> +
> struct objlist_cache obj_list_cache = {
> .tree_version = 1,
> .root = RB_ROOT,
> @@ -167,3 +172,55 @@ out:
> pthread_rwlock_unlock(&obj_list_cache.lock);
> return SD_RES_SUCCESS;
> }
> +
> +static void objlist_deletion_work(struct work *work)
> +{
> + struct objlist_deletion_work *ow =
> + container_of(work, struct objlist_deletion_work, work);
> + struct objlist_cache_entry *entry, *t;
> + uint32_t vid = ow->vid, entry_vid;
> +
> + if (vdi_exist(vid)) {
> + eprintf("VDI (%" PRIx32 ") is still in use, can not be deleted\n",
> + vid);
> + return;
> + }
> +
> + pthread_rwlock_wrlock(&obj_list_cache.lock);
> + list_for_each_entry_safe(entry, t, &obj_list_cache.entry_list, list) {
> + entry_vid = oid_to_vid(entry->oid);
> + if (entry_vid != vid)
> + continue;
> + dprintf("delete object entry %" PRIx64 "\n", entry->oid);
> + list_del(&entry->list);
> + rb_erase(&entry->node, &obj_list_cache.root);
> + free(entry);
> + }
> + pthread_rwlock_unlock(&obj_list_cache.lock);
> +}
> +
> +static void objlist_deletion_done(struct work *work)
> +{
> + struct objlist_deletion_work *ow =
> + container_of(work, struct objlist_deletion_work, work);
> + free(ow);
> +}
> +
> +/* During recovery, some objects may be migrated from one node to a
> + * new one, but we can't remove the object list cache entry in this
> + * case, it may causes recovery failure, so after recovery, we can
> + * not locate the cache entry correctly, causing objlist_cache_remove()
> + * fail to delete it, then we need this function to do the cleanup work
> + * in all nodes. */
I'm still a bit confused why we need both objlist_cache_delete and
objlist_cache_remove. If no recovery happens during object deletion,
we don't need objlist_cache_delete because objlist_cache_remove
removes the entry. objlist_cache_delete is necessary for the case
object deletion and recovery happen at the same time to clean up
unhandled entries by objlist_cache_remove. Is it correct?
Thanks,
Kazutaka
> +int objlist_cache_delete(uint32_t vid)
> +{
> + struct objlist_deletion_work *ow;
> +
> + ow = zalloc(sizeof(*ow));
> + ow->vid = vid;
> + ow->work.fn = objlist_deletion_work;
> + ow->work.done = objlist_deletion_done;
> + queue_work(sys->deletion_wqueue, &ow->work);
> +
> + return SD_RES_SUCCESS;
> +}
> diff --git a/sheep/ops.c b/sheep/ops.c
> index ecf4f2e..bd198a1 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -485,6 +485,14 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
> return ret;
> }
>
> +static int cluster_notify_vdi_del(const struct sd_req *req, struct sd_rsp *rsp,
> + void *data)
> +{
> + uint32_t vid = *(uint32_t *)data;
> +
> + return objlist_cache_delete(vid);
> +}
> +
> static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
> void *data)
> {
> @@ -822,6 +830,12 @@ static struct sd_op_template sd_ops[] = {
> .process_main = cluster_cleanup,
> },
>
> + [SD_OP_NOTIFY_VDI_DEL] = {
> + .type = SD_OP_TYPE_CLUSTER,
> + .force = 1,
> + .process_main = cluster_notify_vdi_del,
> + },
> +
> /* local operations */
> [SD_OP_GET_STORE_LIST] = {
> .type = SD_OP_TYPE_LOCAL,
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index afbc361..a4273e4 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -195,6 +195,7 @@ int create_listen_port(int port, void *data);
> int init_store(const char *dir, int enable_write_cache);
> int init_base_path(const char *dir);
>
> +int vdi_exist(uint32_t vid);
> int add_vdi(char *data, int data_len, uint64_t size, uint32_t *new_vid,
> uint32_t base_vid, int is_snapshot, unsigned int *nr_copies);
>
> @@ -257,6 +258,7 @@ uint32_t get_latest_epoch(void);
> int set_cluster_ctime(uint64_t ctime);
> uint64_t get_cluster_ctime(void);
> int get_obj_list(const struct sd_list_req *, struct sd_list_rsp *, void *);
> +int objlist_cache_delete(uint32_t vid);
>
> int start_recovery(struct vnode_info *cur_vnodes,
> struct vnode_info *old_vnodes);
> diff --git a/sheep/vdi.c b/sheep/vdi.c
> index bcb3df1..c9e070e 100644
> --- a/sheep/vdi.c
> +++ b/sheep/vdi.c
> @@ -15,6 +15,33 @@
> #include "sheepdog_proto.h"
> #include "sheep_priv.h"
>
> +int vdi_exist(uint32_t vid)
> +{
> + struct sheepdog_inode *inode;
> + int ret = 1;
> +
> + inode = zalloc(sizeof(*inode));
> + if (!inode) {
> + ret = 0;
> + goto out;
> + }
> +
> + ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
> + sizeof(*inode), 0);
> + if (ret != SD_RES_SUCCESS) {
> + eprintf("fail to read vdi inode (%" PRIx32 ")\n", vid);
> + ret = 0;
> + goto out;
> + }
> +
> + if (*inode->name == '\0')
> + ret = 0;
> + ret = 1;
> +
> +out:
> + free(inode);
> + return ret;
> +}
>
> /* TODO: should be performed atomically */
> static int create_vdi_obj(char *name, uint32_t new_vid, uint64_t size,
> --
> 1.7.1
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
More information about the sheepdog
mailing list