[Sheepdog] [PATCH v5 5/8] sheep: add flush_vdi operation

Sun Mar 25 22:44:21 CEST 2012

At Sat, 24 Mar 2012 16:47:15 +0800,
Liu Yuan wrote:
> 
> From: Liu Yuan <tailai.ly at taobao.com>
> 
> This is supposed to be initiated by Guest OS, but our collie
> friend might also like it.
> 
> Flush operation is operated on vdi basis, that is, when one guest
> flush its own dirty data, other guests are not affected.
> 
> - use forward_write_obj_req() to flush dirty objects
> 
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
>  include/sheepdog_proto.h |    1 +
>  sheep/object_cache.c     |   48 ++++++++++++++++++++++++++++++++++++++++++++-
>  sheep/ops.c              |   23 ++++++++++++++++++++++
>  sheep/sheep_priv.h       |    2 +
>  sheep/store.c            |    7 +++++-
>  5 files changed, 78 insertions(+), 3 deletions(-)
> 
> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> index 84f12f1..6138108 100644
> --- a/include/sheepdog_proto.h
> +++ b/include/sheepdog_proto.h
> @@ -29,6 +29,7 @@
>  #define SD_OP_RELEASE_VDI    0x13
>  #define SD_OP_GET_VDI_INFO   0x14
>  #define SD_OP_READ_VDIS      0x15
> +#define SD_OP_FLUSH_VDI      0x16
>  
>  #define SD_FLAG_CMD_WRITE    0x01
>  #define SD_FLAG_CMD_COW      0x02
> diff --git a/sheep/object_cache.c b/sheep/object_cache.c
> index 789a3ef..25b4cbc 100644
> --- a/sheep/object_cache.c
> +++ b/sheep/object_cache.c
> @@ -370,10 +370,54 @@ static uint64_t idx_to_oid(uint32_t vid, uint32_t idx)
>  		return vid_to_data_oid(vid, idx);
>  }
>  
> -static int push_cache_object(int fd, uint64_t oid)
> +static int push_cache_object(uint32_t vid, uint32_t idx)
>  {
> +	struct request fake_req;
> +	struct sd_obj_req *hdr = (struct sd_obj_req *)&fake_req.rq;
> +	void *buf;
> +	unsigned data_length;
> +	int ret = SD_RES_NO_MEM;
> +	uint64_t oid = idx_to_oid(vid, idx);
> +
>  	dprintf("%"PRIx64"\n", oid);
> -	return 0;
> +
> +	memset(&fake_req, 0, sizeof(fake_req));
> +	if (is_vdi_obj(oid))
> +		data_length = sizeof(struct sheepdog_inode);
> +	else
> +		data_length = SD_DATA_OBJ_SIZE;
> +
> +	buf = malloc(data_length);
> +	if (buf == NULL) {
> +		eprintf("failed to allocate memory\n");
> +		goto out;
> +	}
> +
> +	ret = read_cache_object(vid, idx, buf, data_length, 0);
> +	if (ret != SD_RES_SUCCESS)
> +		goto out;
> +
> +	hdr->offset = 0;
> +	hdr->data_length = data_length;
> +	hdr->opcode = SD_OP_WRITE_OBJ;
> +	hdr->flags = SD_FLAG_CMD_WRITE;
> +	hdr->oid = oid;
> +	hdr->copies = sys->nr_sobjs;
> +	hdr->epoch = sys->epoch;
> +	fake_req.data = buf;
> +	fake_req.op = get_sd_op(SD_OP_WRITE_OBJ);
> +	fake_req.entry = sys->vnodes;
> +	fake_req.nr_vnodes = sys->nr_vnodes;
> +	fake_req.nr_zones = get_zones_nr_from(sys->nodes, sys->nr_vnodes);
> +
> +	ret = forward_write_obj_req(&fake_req);
> +	if (ret != SD_RES_SUCCESS) {
> +		eprintf("failed to push object %x\n", ret);
> +		goto out;
> +	}
> +out:
> +	free(buf);
> +	return ret;
>  }
>  
>  /* Push back all the dirty objects to sheep cluster storage */
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 4a672be..3edd932 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -454,6 +454,24 @@ static int local_get_snap_file(const struct sd_req *req, struct sd_rsp *rsp,
>  	return ret;
>  }
>  
> +static int local_flush_vdi(const struct sd_req *req, struct sd_rsp *rsp, void *data)
> +{
> +	struct sd_obj_req *hdr = (struct sd_obj_req *)req;
> +	uint64_t oid = hdr->oid;
> +	uint32_t vid = oid_to_vid(oid);
> +	struct object_cache *cache = find_object_cache(vid);
> +
> +	/*
> +	 * We don't propagate the error to Guests because
> +	 * 1) Guests can't handle it for now

What does it mean?  bdrv_co_flush_to_disk needs the error value,
doesn't it?

Thanks,

Kazutaka

> +	 * 2) Next flush reqeust will flush again
> +	 */
> +	if (cache)
> +		object_cache_push(cache);
> +
> +	return SD_RES_SUCCESS;
> +}
> +
>  static struct sd_op_template sd_ops[] = {
>  
>  	/* cluster operations */
> @@ -568,6 +586,11 @@ static struct sd_op_template sd_ops[] = {
>  		.process_work = local_get_snap_file,
>  	},
>  
> +	[SD_OP_FLUSH_VDI] = {
> +		.type = SD_OP_TYPE_LOCAL,
> +		.process_work = local_flush_vdi,
> +	},
> +
>  	/* I/O operations */
>  	[SD_OP_CREATE_AND_WRITE_OBJ] = {
>  		.type = SD_OP_TYPE_IO,
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index af2f80b..afee5f7 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -246,6 +246,7 @@ int write_object_local(uint64_t oid, char *data, unsigned int datalen,
>  		       uint32_t epoch, int create);
>  int read_object_local(uint64_t oid, char *data, unsigned int datalen,
>  		      uint64_t offset, int copies, uint32_t epoch);
> +int forward_write_obj_req(struct request *req);
>  
>  int read_epoch(uint32_t *epoch, uint64_t *ctime,
>  	       struct sd_node *entries, int *nr_entries);
> @@ -285,6 +286,7 @@ int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *d
>  int start_recovery(uint32_t epoch);
>  void resume_recovery_work(void);
>  int is_recoverying_oid(uint64_t oid);
> +int node_in_recovery(void);
>  
>  int write_object(struct sd_vnode *e,
>  		 int vnodes, int zones, uint32_t node_version,
> diff --git a/sheep/store.c b/sheep/store.c
> index d3127d6..8546be4 100644
> --- a/sheep/store.c
> +++ b/sheep/store.c
> @@ -288,7 +288,7 @@ out:
>  	return ret;
>  }
>  
> -static int forward_write_obj_req(struct request *req)
> +int forward_write_obj_req(struct request *req)
>  {
>  	int i, n, nr, fd, ret, pollret;
>  	unsigned wlen;
> @@ -1512,6 +1512,11 @@ void resume_recovery_work(void)
>  	queue_work(sys->recovery_wqueue, &rw->work);
>  }
>  
> +int node_in_recovery(void)
> +{
> +	return !!recovering_work;
> +}
> +
>  int is_recoverying_oid(uint64_t oid)
>  {
>  	uint64_t hval = fnv_64a_buf(&oid, sizeof(uint64_t), FNV1A_64_INIT);
> -- 
> 1.7.8.2
> 
> -- 
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog