[sheepdog] [PATCH v3 4/9] sheep: decrement generational reference count on copy-on-write

Liu Yuan namei.unix at gmail.com
Thu Feb 27 10:58:37 CET 2014


On Sun, Feb 23, 2014 at 02:28:23PM +0900, Hitoshi Mitake wrote:
> This decrements a reference count of the old data object when
> allocating a new data object on CoW.
> 
> Cc: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> Cc: Valerio Pachera <sirio81 at gmail.com>
> Cc: Alessandro Bolgia <alessandro at extensys.it>
> Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> ---
>  sheep/gateway.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 106 insertions(+), 1 deletion(-)
> 
> diff --git a/sheep/gateway.c b/sheep/gateway.c
> index bfd3912..5a3d333 100644
> --- a/sheep/gateway.c
> +++ b/sheep/gateway.c
> @@ -571,6 +571,85 @@ out:
>  	return err_ret;
>  }
>  
> +static int prepare_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
> +			      struct generation_reference *refs)
> +{
> +	int ret;
> +	size_t nr_vids = hdr->data_length / sizeof(*vids);
> +	uint64_t offset;
> +	int start;
> +
> +	offset = hdr->obj.offset - offsetof(struct sd_inode, data_vdi_id);
> +	start = offset / sizeof(*vids);
> +
> +	ret = sd_read_object(hdr->obj.oid, (char *)vids,
> +			     nr_vids * sizeof(vids[0]),
> +			     offsetof(struct sd_inode, data_vdi_id[start]));
> +	if (ret != SD_RES_SUCCESS) {
> +		sd_err("failed to read vdi, %" PRIx64, hdr->obj.oid);
> +		return ret;
> +	}
> +
> +	ret = sd_read_object(hdr->obj.oid, (char *)refs,
> +			     nr_vids * sizeof(refs[0]),
> +			     offsetof(struct sd_inode, data_ref[start]));
> +	if (ret != SD_RES_SUCCESS) {
> +		sd_err("failed to read vdi, %" PRIx64, hdr->obj.oid);
> +		return ret;
> +	}
> +
> +	return ret;
> +}
> +
> +/*
> + * This function decreases a refcnt of vid_to_data_oid(old_vid, idx) and
> + * increases one of vid_to_data_oid(new_vid, idx)
> + */
> +static int update_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
> +			     uint32_t *new_vids,
> +			     struct generation_reference *refs)
> +{
> +	int i, start, ret = SD_RES_SUCCESS;
> +	size_t nr_vids = hdr->data_length / sizeof(*vids);
> +	uint64_t offset;
> +
> +	offset = hdr->obj.offset - offsetof(struct sd_inode, data_vdi_id);
> +	start = offset / sizeof(*vids);
> +
> +	for (i = 0; i < nr_vids; i++) {
> +		if (vids[i] == 0 || vids[i] == new_vids[i])
> +			continue;
> +
> +		ret = sd_dec_object_refcnt(vid_to_data_oid(vids[i], i + start),
> +					   refs[i].generation, refs[i].count);
> +		if (ret != SD_RES_SUCCESS)
> +			sd_err("fail, %d", ret);
> +
> +		refs[i].generation = 0;
> +		refs[i].count = 0;
> +	}
> +
> +	return sd_write_object(hdr->obj.oid, (char *)refs,
> +			       nr_vids * sizeof(*refs),
> +			       offsetof(struct sd_inode,
> +					data_ref) + start * sizeof(*refs),
> +			       false);
> +}
> +
> +/*
> + * return true if the request updates a data_vdi_id field of a vdi object
> + *
> + * XXX: we assume that VMs don't update the inode header and the data_vdi_id
> + * field at the same time.
> + */
> +static bool is_data_vid_update(const struct sd_req *hdr)
> +{
> +	return is_vdi_obj(hdr->obj.oid) &&
> +		SD_INODE_HEADER_SIZE <= hdr->obj.offset &&
> +		hdr->obj.offset + hdr->data_length <=
> +			offsetof(struct sd_inode, data_ref);
> +}
> +
>  int gateway_read_obj(struct request *req)
>  {
>  	uint64_t oid = req->rq.obj.oid;
> @@ -587,6 +666,10 @@ int gateway_read_obj(struct request *req)
>  int gateway_write_obj(struct request *req)
>  {
>  	uint64_t oid = req->rq.obj.oid;
> +	int ret;
> +	struct sd_req *hdr = &req->rq;
> +	uint32_t *vids = NULL, *new_vids = req->data;
> +	struct generation_reference *refs = NULL;
>  
>  	if (oid_is_readonly(oid))
>  		return SD_RES_READONLY;
> @@ -594,7 +677,29 @@ int gateway_write_obj(struct request *req)
>  	if (!bypass_object_cache(req))
>  		return object_cache_handle_request(req);
>  
> -	return gateway_forward_request(req);
> +	if (is_data_vid_update(hdr)) {
> +		size_t nr_vids = hdr->data_length / sizeof(*vids);
> +
> +		/* read the previous vids to discard their references later */
> +		vids = xzalloc(sizeof(*vids) * nr_vids);
> +		refs = xzalloc(sizeof(*refs) * nr_vids);
> +		ret = prepare_obj_refcnt(hdr, vids, refs);
> +		if (ret != SD_RES_SUCCESS)
> +			goto out;
> +	}
> +

Does this mean even hyper volume, which dosesn't make use of generational
reference algorithm, also get affected negatively for write?

Also non-snapshots users (http and nfs) will get affected too? I am wondering
if we can skip ref stuff for non-snapshots vdi competely?

Thanks
Yuan



More information about the sheepdog mailing list