[sheepdog] [PATCH v4] sheepdog: selectable object size support

Liu Yuan namei.unix at gmail.com
Tue Feb 10 04:10:51 CET 2015


On Tue, Jan 27, 2015 at 05:35:27PM +0900, Teruaki Ishizaki wrote:
> Previously, qemu block driver of sheepdog used hard-coded VDI object size.
> This patch enables users to handle "block_size_shift" value for
> calculating VDI object size.
> 
> When you start qemu, you don't need to specify additional command option.
> 
> But when you create the VDI which doesn't have default object size
> with qemu-img command, you specify block_size_shift option.
> 
> If you want to create a VDI of 8MB(1 << 23) object size,
> you need to specify following command option.
> 
>  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M
> 
> In addition, when you don't specify qemu-img command option,
> a default value of sheepdog cluster is used for creating VDI.
> 
>  # qemu-img create sheepdog:test2 100M
> 
> Signed-off-by: Teruaki Ishizaki <ishizaki.teruaki at lab.ntt.co.jp>
> ---
> V4:
>  - Limit a read/write buffer size for creating a preallocated VDI.
>  - Replace a parse function for the block_size_shift option.
>  - Fix an error message.
> 
> V3:
>  - Delete the needless operation of buffer.
>  - Delete the needless operations of request header.
>    for SD_OP_GET_CLUSTER_DEFAULT.
>  - Fix coding style problems.
> 
> V2:
>  - Fix coding style problem (white space).
>  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq.
>  - Initialize request header to use block_size_shift specified by user.
> ---
>  block/sheepdog.c          |  138 ++++++++++++++++++++++++++++++++++++++-------
>  include/block/block_int.h |    1 +
>  2 files changed, 119 insertions(+), 20 deletions(-)
> 
> diff --git a/block/sheepdog.c b/block/sheepdog.c
> index be3176f..a43b947 100644
> --- a/block/sheepdog.c
> +++ b/block/sheepdog.c
> @@ -37,6 +37,7 @@
>  #define SD_OP_READ_VDIS      0x15
>  #define SD_OP_FLUSH_VDI      0x16
>  #define SD_OP_DEL_VDI        0x17
> +#define SD_OP_GET_CLUSTER_DEFAULT   0x18
>  
>  #define SD_FLAG_CMD_WRITE    0x01
>  #define SD_FLAG_CMD_COW      0x02
> @@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq {
>      uint32_t base_vdi_id;
>      uint8_t copies;
>      uint8_t copy_policy;
> -    uint8_t reserved[2];
> +    uint8_t store_policy;
> +    uint8_t block_size_shift;
>      uint32_t snapid;
>      uint32_t type;
>      uint32_t pad[2];
> @@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp {
>      uint32_t pad[5];
>  } SheepdogVdiRsp;
>  
> +typedef struct SheepdogClusterRsp {
> +    uint8_t proto_ver;
> +    uint8_t opcode;
> +    uint16_t flags;
> +    uint32_t epoch;
> +    uint32_t id;
> +    uint32_t data_length;
> +    uint32_t result;
> +    uint8_t nr_copies;
> +    uint8_t copy_policy;
> +    uint8_t block_size_shift;
> +    uint8_t __pad1;
> +    uint32_t __pad2[6];
> +} SheepdogClusterRsp;
> +
>  typedef struct SheepdogInode {
>      char name[SD_MAX_VDI_LEN];
>      char tag[SD_MAX_VDI_TAG_LEN];
> @@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
>      hdr.vdi_size = s->inode.vdi_size;
>      hdr.copy_policy = s->inode.copy_policy;
>      hdr.copies = s->inode.nr_copies;
> +    hdr.block_size_shift = s->inode.block_size_shift;
>  
>      ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
>  
> @@ -1569,9 +1587,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
>  static int sd_prealloc(const char *filename, Error **errp)
>  {
>      BlockDriverState *bs = NULL;
> +    BDRVSheepdogState *base = NULL;
> +    unsigned long buf_size;
>      uint32_t idx, max_idx;
> +    uint32_t object_size;
>      int64_t vdi_size;
> -    void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
> +    void *buf = NULL;
>      int ret;
>  
>      ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
> @@ -1585,18 +1606,24 @@ static int sd_prealloc(const char *filename, Error **errp)
>          ret = vdi_size;
>          goto out;
>      }
> -    max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
> +
> +    base = bs->opaque;
> +    object_size = (UINT32_C(1) << base->inode.block_size_shift);
> +    buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
> +    buf = g_malloc0(buf_size);
> +
> +    max_idx = DIV_ROUND_UP(vdi_size, buf_size);
>  
>      for (idx = 0; idx < max_idx; idx++) {
>          /*
>           * The created image can be a cloned image, so we need to read
>           * a data from the source image.
>           */
> -        ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
> +        ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
>          if (ret < 0) {
>              goto out;
>          }
> -        ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
> +        ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
>          if (ret < 0) {
>              goto out;
>          }
> @@ -1669,6 +1696,21 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
>      return 0;
>  }
>  
> +static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
> +{
> +    struct SheepdogInode *inode = &s->inode;
> +    inode->block_size_shift =
> +        (uint8_t)qemu_opt_get_number_del(opt, "block_size_shift", 0);
> +    if (inode->block_size_shift == 0) {
> +        /* block_size_shift is set for cluster default value by sheepdog */
> +        return 0;
> +    } else if (inode->block_size_shift < 20 || inode->block_size_shift > 31) {
> +        return -EINVAL;
> +    }
> +
> +    return 0;
> +}
> +
>  static int sd_create(const char *filename, QemuOpts *opts,
>                       Error **errp)
>  {
> @@ -1679,6 +1721,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
>      BDRVSheepdogState *s;
>      char tag[SD_MAX_VDI_TAG_LEN];
>      uint32_t snapid;
> +    uint64_t max_vdi_size;
>      bool prealloc = false;
>  
>      s = g_new0(BDRVSheepdogState, 1);
> @@ -1718,9 +1761,10 @@ static int sd_create(const char *filename, QemuOpts *opts,
>          }
>      }
>  
> -    if (s->inode.vdi_size > SD_MAX_VDI_SIZE) {
> -        error_setg(errp, "too big image size");
> -        ret = -EINVAL;
> +    ret = parse_block_size_shift(s, opts);
> +    if (ret < 0) {
> +        error_setg(errp, "Invalid block_size_shift:"
> +                         " '%s'. please use 20-31", buf);
>          goto out;
>      }
>  
> @@ -1757,6 +1801,47 @@ static int sd_create(const char *filename, QemuOpts *opts,
>      }
>  
>      s->aio_context = qemu_get_aio_context();
> +
> +    /* if block_size_shift is not specified, get cluster default value */
> +    if (s->inode.block_size_shift == 0) {

Seems that we don't keep backward compatibility for new QEMU and old sheep that
doesn't support SD_OP_GET_CLUSTER_DEFAULT.

What will happen if old QEMU with new sheep that support block_size_shift? Most
distributions will ship the old stable qemu that wouldn't be aware of
block_size_shift.

I'd suggest we have 0 in block_size_shift represent 4MB to keep backward
compatiblity.

Thanks
Yuan



More information about the sheepdog mailing list