[sheepdog] [Qemu-devel] [PATCH v3] sheepdog: selectable object size support

Markus Armbruster armbru at redhat.com
Mon Jan 26 10:18:55 CET 2015


Kevin Wolf <kwolf at redhat.com> writes:

> Am 23.01.2015 um 09:24 hat Teruaki Ishizaki geschrieben:
>> Previously, qemu block driver of sheepdog used hard-coded VDI object size.
>> This patch enables users to handle "block_size_shift" value for
>> calculating VDI object size.
>> 
>> When you start qemu, you don't need to specify additional command option.
>> 
>> But when you create the VDI which doesn't have default object size
>> with qemu-img command, you specify block_size_shift option.
>> 
>> If you want to create a VDI of 8MB(1 << 23) object size,
>> you need to specify following command option.
>> 
>>  # qemu-img create -o block_size_shift=23 sheepdog:test1 100M
>> 
>> In addition, when you don't specify qemu-img command option,
>> a default value of sheepdog cluster is used for creating VDI.
>> 
>>  # qemu-img create sheepdog:test2 100M
>> 
>> Signed-off-by: Teruaki Ishizaki <ishizaki.teruaki at lab.ntt.co.jp>
>> ---
>> V3:
>>  - Delete the needless operation of buffer.
>>  - Delete the needless operations of request header
>>    for SD_OP_GET_CLUSTER_DEFAULT.
>>  - Fix coding style problems.
>> 
>> V2:
>>  - Fix coding style problem (white space).
>>  - Add members, store_policy and block_size_shift to struct SheepdogVdiReq
>>  - Initialize request header to use block_size_shift specified by user.
>> ---
>>  block/sheepdog.c          |  140 ++++++++++++++++++++++++++++++++++++++-------
>>  include/block/block_int.h |    1 +
>>  2 files changed, 119 insertions(+), 22 deletions(-)
>> 
>> diff --git a/block/sheepdog.c b/block/sheepdog.c
>> index be3176f..c9f06db 100644
>> --- a/block/sheepdog.c
>> +++ b/block/sheepdog.c
>> @@ -37,6 +37,7 @@
>>  #define SD_OP_READ_VDIS      0x15
>>  #define SD_OP_FLUSH_VDI      0x16
>>  #define SD_OP_DEL_VDI        0x17
>> +#define SD_OP_GET_CLUSTER_DEFAULT   0x18
>>  
>>  #define SD_FLAG_CMD_WRITE    0x01
>>  #define SD_FLAG_CMD_COW      0x02
>> @@ -167,7 +168,8 @@ typedef struct SheepdogVdiReq {
>>      uint32_t base_vdi_id;
>>      uint8_t copies;
>>      uint8_t copy_policy;
>> -    uint8_t reserved[2];
>> +    uint8_t store_policy;
>> +    uint8_t block_size_shift;
>>      uint32_t snapid;
>>      uint32_t type;
>>      uint32_t pad[2];
>> @@ -186,6 +188,21 @@ typedef struct SheepdogVdiRsp {
>>      uint32_t pad[5];
>>  } SheepdogVdiRsp;
>>  
>> +typedef struct SheepdogClusterRsp {
>> +    uint8_t proto_ver;
>> +    uint8_t opcode;
>> +    uint16_t flags;
>> +    uint32_t epoch;
>> +    uint32_t id;
>> +    uint32_t data_length;
>> +    uint32_t result;
>> +    uint8_t nr_copies;
>> +    uint8_t copy_policy;
>> +    uint8_t block_size_shift;
>> +    uint8_t __pad1;
>> +    uint32_t __pad2[6];
>> +} SheepdogClusterRsp;
>> +
>>  typedef struct SheepdogInode {
>>      char name[SD_MAX_VDI_LEN];
>>      char tag[SD_MAX_VDI_TAG_LEN];
>> @@ -1544,6 +1561,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
>>      hdr.vdi_size = s->inode.vdi_size;
>>      hdr.copy_policy = s->inode.copy_policy;
>>      hdr.copies = s->inode.nr_copies;
>> +    hdr.block_size_shift = s->inode.block_size_shift;
>>  
>>      ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
>>  
>> @@ -1569,9 +1587,11 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
>>  static int sd_prealloc(const char *filename, Error **errp)
>>  {
>>      BlockDriverState *bs = NULL;
>> +    BDRVSheepdogState *base = NULL;
>>      uint32_t idx, max_idx;
>> +    uint32_t object_size;
>>      int64_t vdi_size;
>> -    void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
>> +    void *buf = NULL;
>>      int ret;
>>  
>>      ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
>> @@ -1585,18 +1605,23 @@ static int sd_prealloc(const char *filename, Error **errp)
>>          ret = vdi_size;
>>          goto out;
>>      }
>> -    max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
>> +
>> +    base = bs->opaque;
>> +    object_size = (UINT32_C(1) << base->inode.block_size_shift);
>> +    buf = g_malloc0(object_size);
>
> If I understand correctly, block_size_shift can be up to 31, i.e. this
> is a 2 GB allocation. Do you really think this is a good idea?
>
> At least use g_try_malloc0() here, so that a memory allocation failure
> doesn't crash qemu. (Same goes for all potentially huge allocations that
> you make in the whole codebase.)
>
>> +    max_idx = DIV_ROUND_UP(vdi_size, object_size);
>>  
>>      for (idx = 0; idx < max_idx; idx++) {
>>          /*
>>           * The created image can be a cloned image, so we need to read
>>           * a data from the source image.
>>           */
>> -        ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
>> +        ret = bdrv_pread(bs, idx * object_size, buf, object_size);
>>          if (ret < 0) {
>>              goto out;
>>          }
>> -        ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
>> +        ret = bdrv_pwrite(bs, idx * object_size, buf, object_size);
>>          if (ret < 0) {
>>              goto out;
>>          }
>> @@ -1610,7 +1635,9 @@ out_with_err_set:
>>      if (bs) {
>>          bdrv_unref(bs);
>>      }
>> -    g_free(buf);
>> +    if (buf) {
>> +        g_free(buf);
>> +    }
>
> This is unnecessary. g_free(NULL) is valid, it does nothing.
>
>>      return ret;
>>  }
>> @@ -1669,6 +1696,17 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
>>      return 0;
>>  }
>>  
>> +static int parse_block_size_shift(BDRVSheepdogState *s, const char *opt)
>> +{
>> +    struct SheepdogInode *inode = &s->inode;
>> +    inode->block_size_shift = (uint8_t)atoi(opt);
>
> atoi() is best avoided, it has poor error handling.
>
> But I think you don't need this parse function at all, see below.
>
>> +    if (inode->block_size_shift < 20 || inode->block_size_shift > 31) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>  static int sd_create(const char *filename, QemuOpts *opts,
>>                       Error **errp)
>>  {
>> @@ -1679,6 +1717,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
>>      BDRVSheepdogState *s;
>>      char tag[SD_MAX_VDI_TAG_LEN];
>>      uint32_t snapid;
>> +    uint64_t max_vdi_size;
>>      bool prealloc = false;
>>  
>>      s = g_new0(BDRVSheepdogState, 1);
>> @@ -1718,10 +1757,15 @@ static int sd_create(const char *filename, QemuOpts *opts,
>>          }
>>      }
>>  
>> -    if (s->inode.vdi_size > SD_MAX_VDI_SIZE) {
>> -        error_setg(errp, "too big image size");
>> -        ret = -EINVAL;
>> -        goto out;
>> +    g_free(buf);
>> +    buf = qemu_opt_get_del(opts, BLOCK_OPT_BLOCK_SIZE_SHIFT);
>> +    if (buf) {
>> +        ret = parse_block_size_shift(s, buf);
>> +        if (ret < 0) {
>> +            error_setg(errp, "Invalid block_size_shift:"
>> +                             " '%s'. please use 20-31", buf);
>> +            goto out;
>> +        }
>>      }
>
> You could use qemu_opt_get_number_del() here and get a number directly
> from QemuOpts that you don't have to parse any more, if you defined the
> option as QEMU_OPT_NUMBER instead of QEMU_OPT_STRING.

s/could/should/

>>      if (backing_file) {
>> @@ -1757,6 +1801,45 @@ static int sd_create(const char *filename,
>> QemuOpts *opts,
>>      }
>>  
>>      s->aio_context = qemu_get_aio_context();
>> +
>> +    /* if block_size_shift is not specified, get cluster default value */
>> +    if (s->inode.block_size_shift == 0) {
>> +        SheepdogVdiReq hdr;
>> +        SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr;
>> +        Error *local_err = NULL;
>> +        int fd;
>> +        unsigned int wlen = 0, rlen = 0;
>> +
>> +        fd = connect_to_sdog(s, &local_err);
>> +        if (fd < 0) {
>> +            error_report("%s", error_get_pretty(local_err));
>> +            error_free(local_err);
>> +            ret = -EIO;
>> +            goto out;
>> +        }
>> +
>> +        memset(&hdr, 0, sizeof(hdr));
>> +        hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT;
>> +        hdr.proto_ver = SD_PROTO_VER;
>> +
>> +        ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
>> +                     NULL, &wlen, &rlen);
>> +        closesocket(fd);
>> +        if (ret) {
>> +            error_setg_errno(errp, -ret, "failed to get cluster default");
>> +            goto out;
>> +        }
>> +        s->inode.block_size_shift = rsp->block_size_shift;
>> +    }
>> +
>> + max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) *
>> MAX_DATA_OBJS;
>> +
>> +    if (s->inode.vdi_size > max_vdi_size) {
>> +        error_setg(errp, "too big image size");
>
> Please capitalise the first word in an error messages, i.e. "Too big
> image size".

Make it "Image too big", or even better include the actual limit in the
message.

>> +        ret = -EINVAL;
>> +        goto out;
>> +    }
>> +
>>      ret = do_sd_create(s, &vid, 0, errp);
>>      if (ret) {
>>          goto out;
>
> Kevin



More information about the sheepdog mailing list