[sheepdog] [PATCH v2 06/11] sheep: decrement generational reference count on copy-on-write

Kai Zhang kyle at zelin.io
Sun Jun 30 08:20:18 CEST 2013


Based on my understanding, the new way of copy-on-write will be more costly than before.
We will do two more gateway write operations for each object.

Did you do some performance testing? Do you think this will drop performance?
(Although the performance drop would be covered by enabling cache, I still what
to know how it will affect the performance when cache is disabled)

Thanks,
Kyle


On Jun 19, 2013, at 1:14 AM, MORITA Kazutaka <morita.kazutaka at gmail.com> wrote:

> From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> 
> This decrements a reference count of the old data object when
> allocating a new data object on CoW.
> 
> Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> ---
> sheep/gateway.c |  103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 102 insertions(+), 1 deletion(-)
> 
> diff --git a/sheep/gateway.c b/sheep/gateway.c
> index 08c58d2..6f12d14 100644
> --- a/sheep/gateway.c
> +++ b/sheep/gateway.c
> @@ -320,9 +320,88 @@ static int gateway_forward_request(struct request *req)
> 	return err_ret;
> }
> 
> +static int prepare_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
> +			      struct generation_reference *refs)
> +{
> +	int ret;
> +	size_t nr_vids = hdr->data_length / sizeof(*vids);
> +	uint64_t offset;
> +	int start;
> +
> +	offset = hdr->obj.offset - offsetof(struct sd_inode, data_vdi_id);
> +	start = offset / sizeof(*vids);
> +
> +	ret = read_object(hdr->obj.oid, (char *)vids, nr_vids * sizeof(vids[0]),
> +			  offsetof(struct sd_inode, data_vdi_id[start]));
> +	if (ret != SD_RES_SUCCESS) {
> +		sd_eprintf("failed to read vdi, %" PRIx64, hdr->obj.oid);
> +		return ret;
> +	}
> +	ret = read_object(hdr->obj.oid, (char *)refs, nr_vids * sizeof(refs[0]),
> +			  offsetof(struct sd_inode, data_ref[start]));
> +	if (ret != SD_RES_SUCCESS) {
> +		sd_eprintf("failed to read vdi, %" PRIx64, hdr->obj.oid);
> +		return ret;
> +	}
> +
> +	return ret;
> +}
> +
> +/*
> + * This function decreases a refcnt of vid_to_data_oid(old_vid, idx) and
> + * increases one of vid_to_data_oid(new_vid, idx)
> + */
> +static int update_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
> +			     uint32_t *new_vids,
> +			     struct generation_reference *refs)
> +{
> +	int i, start, ret = SD_RES_SUCCESS;
> +	size_t nr_vids = hdr->data_length / sizeof(*vids);
> +	uint64_t offset;
> +
> +	offset = hdr->obj.offset - offsetof(struct sd_inode, data_vdi_id);
> +	start = offset / sizeof(*vids);
> +
> +	for (i = 0; i < nr_vids; i++) {
> +		if (vids[i] == 0 || vids[i] == new_vids[i])
> +			continue;
> +
> +		ret = dec_object_refcnt(vid_to_data_oid(vids[i], i + start),
> +					refs[i].generation, refs[i].count);
> +		if (ret != SD_RES_SUCCESS)
> +			sd_eprintf("fail, %d", ret);
> +
> +		refs[i].generation = 0;
> +		refs[i].count = 0;
> +	}
> +
> +	return write_object(hdr->obj.oid, (char *)refs, nr_vids * sizeof(*refs),
> +			    offsetof(struct sd_inode,
> +				     data_ref) + start * sizeof(*refs),
> +			    false);
> +}
> +
> +/*
> + * return true if the request updates a data_vdi_id field of a vdi object
> + *
> + * XXX: we assume that VMs don't update the inode header and the data_vdi_id
> + * field at the same time.
> + */
> +static bool is_data_vid_update(const struct sd_req *hdr)
> +{
> +	return is_vdi_obj(hdr->obj.oid) &&
> +		SD_INODE_HEADER_SIZE <= hdr->obj.offset &&
> +		hdr->obj.offset + hdr->data_length <=
> +			offsetof(struct sd_inode, data_ref);
> +}
> +
> int gateway_write_obj(struct request *req)
> {
> 	uint64_t oid = req->rq.obj.oid;
> +	int ret;
> +	struct sd_req *hdr = &req->rq;
> +	uint32_t *vids = NULL, *new_vids = req->data;
> +	struct generation_reference *refs = NULL;
> 
> 	if (oid_is_readonly(oid))
> 		return SD_RES_READONLY;
> @@ -330,7 +409,29 @@ int gateway_write_obj(struct request *req)
> 	if (!bypass_object_cache(req))
> 		return object_cache_handle_request(req);
> 
> -	return gateway_forward_request(req);
> +	if (is_data_vid_update(hdr)) {
> +		size_t nr_vids = hdr->data_length / sizeof(*vids);
> +
> +		/* read the previous vids to discard their references later */
> +		vids = xzalloc(sizeof(*vids) * nr_vids);
> +		refs = xzalloc(sizeof(*refs) * nr_vids);
> +		ret = prepare_obj_refcnt(hdr, vids, refs);
> +		if (ret != SD_RES_SUCCESS)
> +			goto out;
> +	}
> +
> +	ret = gateway_forward_request(req);
> +	if (ret != SD_RES_SUCCESS)
> +		goto out;
> +
> +	if (is_data_vid_update(hdr)) {
> +		sd_dprintf("udpate reference counts, %" PRIx64, hdr->obj.oid);
> +		update_obj_refcnt(hdr, vids, new_vids, refs);
> +	}
> +out:
> +	free(vids);
> +	free(refs);
> +	return ret;
> }
> 
> int gateway_create_and_write_obj(struct request *req)
> -- 
> 1.7.9.5
> 
> -- 
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog




More information about the sheepdog mailing list