[sheepdog] [PATCH 1/2] sheep: update ledger objects in an asynchronous manner
Hitoshi Mitake
mitake.hitoshi at gmail.com
Mon Jun 1 16:36:15 CEST 2015
At Sat, 30 May 2015 23:00:27 +0900,
Hitoshi Mitake wrote:
>
> This patch let gateway update ledger objects in an asynchronous
> manner. This change improves performance. Especially vdi deletion will
> be improved.
>
> Of course this change introduces a chance of inconsistency of
> metadata, but but sheepdog is a distributed file system without
> metadata journaling. Therefore inconsistency of metadata introduced by
> machine crash or network error, etc must be solved by methods like
> fsck (dog vdi check, etc). For this purpose, the next patch enhances
> VID GC for collecting garbage objects which belong to removed VDIs.
>
> Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> ---
> sheep/gateway.c | 106 ++++++++++++++++++++++++++++++++++++++++++++------------
> 1 file changed, 84 insertions(+), 22 deletions(-)
Applied this series.
Thanks,
Hitoshi
>
> diff --git a/sheep/gateway.c b/sheep/gateway.c
> index 3cb21f0..af7fce4 100644
> --- a/sheep/gateway.c
> +++ b/sheep/gateway.c
> @@ -584,16 +584,11 @@ static int prepare_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
> * This function decreases a refcnt of vid_to_data_oid(old_vid, idx) and
> * increases one of vid_to_data_oid(new_vid, idx)
> */
> -static int update_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
> - uint32_t *new_vids,
> +static void update_obj_refcnt(uint64_t offset, int start,
> + size_t nr_vids, uint32_t *vids, uint32_t *new_vids,
> struct generation_reference *refs)
> {
> - int i, start, ret = SD_RES_SUCCESS;
> - size_t nr_vids = hdr->data_length / sizeof(*vids);
> - uint64_t offset;
> -
> - offset = hdr->obj.offset - offsetof(struct sd_inode, data_vdi_id);
> - start = offset / sizeof(*vids);
> + int i, ret = SD_RES_SUCCESS;
>
> for (i = 0; i < nr_vids; i++) {
> if (vids[i] == 0 || vids[i] == new_vids[i])
> @@ -603,16 +598,7 @@ static int update_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
> refs[i].generation, refs[i].count);
> if (ret != SD_RES_SUCCESS)
> sd_err("fail, %d", ret);
> -
> - refs[i].generation = 0;
> - refs[i].count = 0;
> }
> -
> - return sd_write_object(hdr->obj.oid, (char *)refs,
> - nr_vids * sizeof(*refs),
> - offsetof(struct sd_inode, gref)
> - + start * sizeof(*refs),
> - false);
> }
>
> static bool is_inode_refresh_req(struct request *req)
> @@ -663,13 +649,51 @@ int gateway_read_obj(struct request *req)
> return ret;
> }
>
> +struct update_obj_refcnt_work {
> + struct work work;
> +
> + uint64_t offset;
> + int start;
> +
> + size_t nr_vids;
> + uint32_t *vids, *new_vids;
> +
> + struct generation_reference *refs;
> +};
> +
> +static void async_update_obj_refcnt_work(struct work *work)
> +{
> + struct update_obj_refcnt_work *w =
> + container_of(work, struct update_obj_refcnt_work, work);
> +
> + sd_debug("async update of object reference count start: %p", w);
> + update_obj_refcnt(w->offset, w->start, w->nr_vids, w->vids,
> + w->new_vids, w->refs);
> +}
> +
> +static void async_update_obj_refcnt_done(struct work *work)
> +{
> + struct update_obj_refcnt_work *w =
> + container_of(work, struct update_obj_refcnt_work, work);
> +
> + sd_debug("async update of object reference count done: %p", w);
> +
> + free(w->vids);
> + free(w->new_vids);
> + free(w->refs);
> +
> + free(w);
> +}
> +
> int gateway_write_obj(struct request *req)
> {
> uint64_t oid = req->rq.obj.oid;
> int ret;
> struct sd_req *hdr = &req->rq;
> uint32_t *vids = NULL, *new_vids = req->data;
> - struct generation_reference *refs = NULL;
> + struct generation_reference *refs = NULL, *zeroed_refs = NULL;
> + struct update_obj_refcnt_work *refcnt_work;
> + size_t nr_vids;
>
> if ((req->rq.flags & SD_FLAG_CMD_TGT) &&
> is_refresh_required(oid_to_vid(oid))) {
> @@ -685,13 +709,14 @@ int gateway_write_obj(struct request *req)
>
>
> if (is_data_vid_update(hdr)) {
> - size_t nr_vids = hdr->data_length / sizeof(*vids);
> + nr_vids = hdr->data_length / sizeof(*vids);
>
> invalidate_other_nodes(oid_to_vid(oid));
>
> /* read the previous vids to discard their references later */
> vids = xzalloc(sizeof(*vids) * nr_vids);
> refs = xzalloc(sizeof(*refs) * nr_vids);
> + zeroed_refs = xcalloc(sizeof(*zeroed_refs), nr_vids);
> ret = prepare_obj_refcnt(hdr, vids, refs);
> if (ret != SD_RES_SUCCESS)
> goto out;
> @@ -702,13 +727,50 @@ int gateway_write_obj(struct request *req)
> goto out;
>
> if (is_data_vid_update(hdr)) {
> + uint64_t offset;
> + int start;
> +
> + offset = hdr->obj.offset
> + - offsetof(struct sd_inode, data_vdi_id);
> + start = offset / sizeof(*vids);
> +
> sd_debug("update reference counts, %" PRIx64, hdr->obj.oid);
> - update_obj_refcnt(hdr, vids, new_vids, refs);
> +
> + ret = sd_write_object(hdr->obj.oid, (char *)zeroed_refs,
> + nr_vids * sizeof(*zeroed_refs),
> + offsetof(struct sd_inode, gref)
> + + start * sizeof(*zeroed_refs), false);
> + if (ret != SD_RES_SUCCESS) {
> + sd_err("updating reference count of inode object %"
> + PRIx64 " failed: %s", hdr->obj.oid,
> + sd_strerror(ret));
> +
> + goto out;
> + }
> +
> + sd_debug("update ledger objects of %"PRIx64, hdr->obj.oid);
> + refcnt_work = xzalloc(sizeof(*refcnt_work));
> +
> + refcnt_work->vids = vids;
> + refcnt_work->refs = refs;
> + refcnt_work->nr_vids = nr_vids;
> + refcnt_work->new_vids = xcalloc(hdr->data_length,
> + sizeof(uint32_t));
> + memcpy(refcnt_work->new_vids, new_vids, hdr->data_length);
> +
> + refcnt_work->offset = offset;
> + refcnt_work->start = start;
> +
> +
> + refcnt_work->work.fn = async_update_obj_refcnt_work;
> + refcnt_work->work.done = async_update_obj_refcnt_done;
> +
> + queue_work(sys->io_wqueue, &refcnt_work->work);
> }
> +
> out:
> + free(zeroed_refs);
>
> - free(vids);
> - free(refs);
> return ret;
> }
>
> --
> 1.9.1
>
More information about the sheepdog
mailing list