[Sheepdog] [PATCH v5 5/8] sheep: add flush_vdi operation
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Sun Mar 25 22:44:21 CEST 2012
At Sat, 24 Mar 2012 16:47:15 +0800,
Liu Yuan wrote:
>
> From: Liu Yuan <tailai.ly at taobao.com>
>
> This is supposed to be initiated by Guest OS, but our collie
> friend might also like it.
>
> Flush operation is operated on vdi basis, that is, when one guest
> flush its own dirty data, other guests are not affected.
>
> - use forward_write_obj_req() to flush dirty objects
>
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
> include/sheepdog_proto.h | 1 +
> sheep/object_cache.c | 48 ++++++++++++++++++++++++++++++++++++++++++++-
> sheep/ops.c | 23 ++++++++++++++++++++++
> sheep/sheep_priv.h | 2 +
> sheep/store.c | 7 +++++-
> 5 files changed, 78 insertions(+), 3 deletions(-)
>
> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> index 84f12f1..6138108 100644
> --- a/include/sheepdog_proto.h
> +++ b/include/sheepdog_proto.h
> @@ -29,6 +29,7 @@
> #define SD_OP_RELEASE_VDI 0x13
> #define SD_OP_GET_VDI_INFO 0x14
> #define SD_OP_READ_VDIS 0x15
> +#define SD_OP_FLUSH_VDI 0x16
>
> #define SD_FLAG_CMD_WRITE 0x01
> #define SD_FLAG_CMD_COW 0x02
> diff --git a/sheep/object_cache.c b/sheep/object_cache.c
> index 789a3ef..25b4cbc 100644
> --- a/sheep/object_cache.c
> +++ b/sheep/object_cache.c
> @@ -370,10 +370,54 @@ static uint64_t idx_to_oid(uint32_t vid, uint32_t idx)
> return vid_to_data_oid(vid, idx);
> }
>
> -static int push_cache_object(int fd, uint64_t oid)
> +static int push_cache_object(uint32_t vid, uint32_t idx)
> {
> + struct request fake_req;
> + struct sd_obj_req *hdr = (struct sd_obj_req *)&fake_req.rq;
> + void *buf;
> + unsigned data_length;
> + int ret = SD_RES_NO_MEM;
> + uint64_t oid = idx_to_oid(vid, idx);
> +
> dprintf("%"PRIx64"\n", oid);
> - return 0;
> +
> + memset(&fake_req, 0, sizeof(fake_req));
> + if (is_vdi_obj(oid))
> + data_length = sizeof(struct sheepdog_inode);
> + else
> + data_length = SD_DATA_OBJ_SIZE;
> +
> + buf = malloc(data_length);
> + if (buf == NULL) {
> + eprintf("failed to allocate memory\n");
> + goto out;
> + }
> +
> + ret = read_cache_object(vid, idx, buf, data_length, 0);
> + if (ret != SD_RES_SUCCESS)
> + goto out;
> +
> + hdr->offset = 0;
> + hdr->data_length = data_length;
> + hdr->opcode = SD_OP_WRITE_OBJ;
> + hdr->flags = SD_FLAG_CMD_WRITE;
> + hdr->oid = oid;
> + hdr->copies = sys->nr_sobjs;
> + hdr->epoch = sys->epoch;
> + fake_req.data = buf;
> + fake_req.op = get_sd_op(SD_OP_WRITE_OBJ);
> + fake_req.entry = sys->vnodes;
> + fake_req.nr_vnodes = sys->nr_vnodes;
> + fake_req.nr_zones = get_zones_nr_from(sys->nodes, sys->nr_vnodes);
> +
> + ret = forward_write_obj_req(&fake_req);
> + if (ret != SD_RES_SUCCESS) {
> + eprintf("failed to push object %x\n", ret);
> + goto out;
> + }
> +out:
> + free(buf);
> + return ret;
> }
>
> /* Push back all the dirty objects to sheep cluster storage */
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 4a672be..3edd932 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -454,6 +454,24 @@ static int local_get_snap_file(const struct sd_req *req, struct sd_rsp *rsp,
> return ret;
> }
>
> +static int local_flush_vdi(const struct sd_req *req, struct sd_rsp *rsp, void *data)
> +{
> + struct sd_obj_req *hdr = (struct sd_obj_req *)req;
> + uint64_t oid = hdr->oid;
> + uint32_t vid = oid_to_vid(oid);
> + struct object_cache *cache = find_object_cache(vid);
> +
> + /*
> + * We don't propagate the error to Guests because
> + * 1) Guests can't handle it for now
What does it mean? bdrv_co_flush_to_disk needs the error value,
doesn't it?
Thanks,
Kazutaka
> + * 2) Next flush reqeust will flush again
> + */
> + if (cache)
> + object_cache_push(cache);
> +
> + return SD_RES_SUCCESS;
> +}
> +
> static struct sd_op_template sd_ops[] = {
>
> /* cluster operations */
> @@ -568,6 +586,11 @@ static struct sd_op_template sd_ops[] = {
> .process_work = local_get_snap_file,
> },
>
> + [SD_OP_FLUSH_VDI] = {
> + .type = SD_OP_TYPE_LOCAL,
> + .process_work = local_flush_vdi,
> + },
> +
> /* I/O operations */
> [SD_OP_CREATE_AND_WRITE_OBJ] = {
> .type = SD_OP_TYPE_IO,
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index af2f80b..afee5f7 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -246,6 +246,7 @@ int write_object_local(uint64_t oid, char *data, unsigned int datalen,
> uint32_t epoch, int create);
> int read_object_local(uint64_t oid, char *data, unsigned int datalen,
> uint64_t offset, int copies, uint32_t epoch);
> +int forward_write_obj_req(struct request *req);
>
> int read_epoch(uint32_t *epoch, uint64_t *ctime,
> struct sd_node *entries, int *nr_entries);
> @@ -285,6 +286,7 @@ int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *d
> int start_recovery(uint32_t epoch);
> void resume_recovery_work(void);
> int is_recoverying_oid(uint64_t oid);
> +int node_in_recovery(void);
>
> int write_object(struct sd_vnode *e,
> int vnodes, int zones, uint32_t node_version,
> diff --git a/sheep/store.c b/sheep/store.c
> index d3127d6..8546be4 100644
> --- a/sheep/store.c
> +++ b/sheep/store.c
> @@ -288,7 +288,7 @@ out:
> return ret;
> }
>
> -static int forward_write_obj_req(struct request *req)
> +int forward_write_obj_req(struct request *req)
> {
> int i, n, nr, fd, ret, pollret;
> unsigned wlen;
> @@ -1512,6 +1512,11 @@ void resume_recovery_work(void)
> queue_work(sys->recovery_wqueue, &rw->work);
> }
>
> +int node_in_recovery(void)
> +{
> + return !!recovering_work;
> +}
> +
> int is_recoverying_oid(uint64_t oid)
> {
> uint64_t hval = fnv_64a_buf(&oid, sizeof(uint64_t), FNV1A_64_INIT);
> --
> 1.7.8.2
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
More information about the sheepdog
mailing list