[sheepdog] [PATCH v3 4/9] sheep: decrement generational reference count on copy-on-write
Hitoshi Mitake
mitake.hitoshi at gmail.com
Thu Feb 27 13:23:27 CET 2014
At Thu, 27 Feb 2014 17:58:37 +0800,
Liu Yuan wrote:
>
> On Sun, Feb 23, 2014 at 02:28:23PM +0900, Hitoshi Mitake wrote:
> > This decrements a reference count of the old data object when
> > allocating a new data object on CoW.
> >
> > Cc: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> > Cc: Valerio Pachera <sirio81 at gmail.com>
> > Cc: Alessandro Bolgia <alessandro at extensys.it>
> > Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> > ---
> > sheep/gateway.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> > 1 file changed, 106 insertions(+), 1 deletion(-)
> >
> > diff --git a/sheep/gateway.c b/sheep/gateway.c
> > index bfd3912..5a3d333 100644
> > --- a/sheep/gateway.c
> > +++ b/sheep/gateway.c
> > @@ -571,6 +571,85 @@ out:
> > return err_ret;
> > }
> >
> > +static int prepare_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
> > + struct generation_reference *refs)
> > +{
> > + int ret;
> > + size_t nr_vids = hdr->data_length / sizeof(*vids);
> > + uint64_t offset;
> > + int start;
> > +
> > + offset = hdr->obj.offset - offsetof(struct sd_inode, data_vdi_id);
> > + start = offset / sizeof(*vids);
> > +
> > + ret = sd_read_object(hdr->obj.oid, (char *)vids,
> > + nr_vids * sizeof(vids[0]),
> > + offsetof(struct sd_inode, data_vdi_id[start]));
> > + if (ret != SD_RES_SUCCESS) {
> > + sd_err("failed to read vdi, %" PRIx64, hdr->obj.oid);
> > + return ret;
> > + }
> > +
> > + ret = sd_read_object(hdr->obj.oid, (char *)refs,
> > + nr_vids * sizeof(refs[0]),
> > + offsetof(struct sd_inode, data_ref[start]));
> > + if (ret != SD_RES_SUCCESS) {
> > + sd_err("failed to read vdi, %" PRIx64, hdr->obj.oid);
> > + return ret;
> > + }
> > +
> > + return ret;
> > +}
> > +
> > +/*
> > + * This function decreases a refcnt of vid_to_data_oid(old_vid, idx) and
> > + * increases one of vid_to_data_oid(new_vid, idx)
> > + */
> > +static int update_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
> > + uint32_t *new_vids,
> > + struct generation_reference *refs)
> > +{
> > + int i, start, ret = SD_RES_SUCCESS;
> > + size_t nr_vids = hdr->data_length / sizeof(*vids);
> > + uint64_t offset;
> > +
> > + offset = hdr->obj.offset - offsetof(struct sd_inode, data_vdi_id);
> > + start = offset / sizeof(*vids);
> > +
> > + for (i = 0; i < nr_vids; i++) {
> > + if (vids[i] == 0 || vids[i] == new_vids[i])
> > + continue;
> > +
> > + ret = sd_dec_object_refcnt(vid_to_data_oid(vids[i], i + start),
> > + refs[i].generation, refs[i].count);
> > + if (ret != SD_RES_SUCCESS)
> > + sd_err("fail, %d", ret);
> > +
> > + refs[i].generation = 0;
> > + refs[i].count = 0;
> > + }
> > +
> > + return sd_write_object(hdr->obj.oid, (char *)refs,
> > + nr_vids * sizeof(*refs),
> > + offsetof(struct sd_inode,
> > + data_ref) + start * sizeof(*refs),
> > + false);
> > +}
> > +
> > +/*
> > + * return true if the request updates a data_vdi_id field of a vdi object
> > + *
> > + * XXX: we assume that VMs don't update the inode header and the data_vdi_id
> > + * field at the same time.
> > + */
> > +static bool is_data_vid_update(const struct sd_req *hdr)
> > +{
> > + return is_vdi_obj(hdr->obj.oid) &&
> > + SD_INODE_HEADER_SIZE <= hdr->obj.offset &&
> > + hdr->obj.offset + hdr->data_length <=
> > + offsetof(struct sd_inode, data_ref);
> > +}
> > +
> > int gateway_read_obj(struct request *req)
> > {
> > uint64_t oid = req->rq.obj.oid;
> > @@ -587,6 +666,10 @@ int gateway_read_obj(struct request *req)
> > int gateway_write_obj(struct request *req)
> > {
> > uint64_t oid = req->rq.obj.oid;
> > + int ret;
> > + struct sd_req *hdr = &req->rq;
> > + uint32_t *vids = NULL, *new_vids = req->data;
> > + struct generation_reference *refs = NULL;
> >
> > if (oid_is_readonly(oid))
> > return SD_RES_READONLY;
> > @@ -594,7 +677,29 @@ int gateway_write_obj(struct request *req)
> > if (!bypass_object_cache(req))
> > return object_cache_handle_request(req);
> >
> > - return gateway_forward_request(req);
> > + if (is_data_vid_update(hdr)) {
> > + size_t nr_vids = hdr->data_length / sizeof(*vids);
> > +
> > + /* read the previous vids to discard their references later */
> > + vids = xzalloc(sizeof(*vids) * nr_vids);
> > + refs = xzalloc(sizeof(*refs) * nr_vids);
> > + ret = prepare_obj_refcnt(hdr, vids, refs);
> > + if (ret != SD_RES_SUCCESS)
> > + goto out;
> > + }
> > +
>
> Does this mean even hyper volume, which dosesn't make use of generational
> reference algorithm, also get affected negatively for write?
>
> Also non-snapshots users (http and nfs) will get affected too? I am wondering
> if we can skip ref stuff for non-snapshots vdi competely?
The above dereference of objects is equal to simple remove for non
snapshot users (hypervolume, http, nfs). There is no side effect.
Thanks,
Hitoshi
More information about the sheepdog
mailing list