[sheepdog] [PATCH v2] sheep/http: fix parallely append-upload problem

Robin Dong robin.k.dong at gmail.com
Fri Sep 19 08:33:42 CEST 2014


This patch has been commited to sheep/http, master branch

2014-09-19 14:30 GMT+08:00 Robin Dong <robin.k.dong at gmail.com>:

> From: Robin Dong <sanbai at taobao.com>
>
> The 'append' opreation in sheepdog contains:
>
> 1. lock(data_vid)
> 2. read onode
> 3. allocate space and set o_extent.data_len to zero
> 4. write onode down
> 5. unlock(data_vid)
> 6. read uploading data and set o_extent.data_len in terms of
> req->data_length
> 7. write onode down
>
> We unlock(data_vid) too early to protect the updating of
> 'o_extent.data_len'
> so one process may overwrite the space allocated by another process.
> To solve this problem, we move set operation of 'data_len' into lock region
> and modify the code to write data_vid.
>
> Signed-off-by: Robin Dong <sanbai at taobao.com>
> ---
> v2-->v1:
>   1. remove faulty modification of md.c
>
>  sheep/http/kv.c | 106
> ++++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 64 insertions(+), 42 deletions(-)
>
> diff --git a/sheep/http/kv.c b/sheep/http/kv.c
> index 04ee462..4611e05 100644
> --- a/sheep/http/kv.c
> +++ b/sheep/http/kv.c
> @@ -727,8 +727,11 @@ static int onode_allocate_extents(struct kv_onode
> *onode,
>                  * if we can put whole request data into extra space of
> last
>                  * o_extent, it don't need to allocate new extent.
>                  */
> -               if (req->data_length <= reserv_len)
> +               if (req->data_length <= reserv_len) {
> +                       onode->o_extent[idx - 1].data_len +=
> req->data_length;
>                         goto out;
> +               } else
> +                       onode->o_extent[idx - 1].data_len += reserv_len;
>         }
>         count = DIV_ROUND_UP((req->data_length - reserv_len),
> SD_DATA_OBJ_SIZE);
>         sys->cdrv->lock(data_vid);
> @@ -751,52 +754,22 @@ static int onode_allocate_extents(struct kv_onode
> *onode,
>
>         onode->o_extent[idx].start = start;
>         onode->o_extent[idx].count = count;
> -       onode->o_extent[idx].data_len = 0;
> +       onode->o_extent[idx].data_len = req->data_length - reserv_len;
>         onode->nr_extent++;
>  out:
>         return ret;
>  }
>
> -static int onode_populate_extents(struct kv_onode *onode,
> -                                 struct http_request *req)
> +static int do_vdi_write(struct http_request *req, uint32_t data_vid,
> +                       uint64_t offset, uint64_t total, char *data_buf,
> +                       bool create)
>  {
> -       struct onode_extent *ext;
> -       struct onode_extent *last_ext = onode->o_extent + onode->nr_extent
> - 1;
> -       ssize_t size;
> -       uint64_t done = 0, total, offset = 0, reserv_len;
> -       uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);
> +       uint64_t done = 0, size;
>         int ret = SD_RES_SUCCESS;
> -       char *data_buf = NULL;
> -       uint32_t data_vid = onode->data_vid;
> -       bool create = true;
> -
> -       data_buf = xmalloc(write_buffer_size);
> -       if (last_ext->data_len == 0 && onode->nr_extent == 1) {
> -               offset = last_ext->start * SD_DATA_OBJ_SIZE +
> -                        last_ext->data_len;
> -               last_ext->data_len += req->data_length;
> -       } else if (last_ext->data_len > 0) {
> -               offset = last_ext->start * SD_DATA_OBJ_SIZE +
> -                        last_ext->data_len;
> -               last_ext->data_len += req->data_length;
> -               create = false;
> -       } else {
> -               ext = last_ext - 1;
> -               reserv_len = ext->count * SD_DATA_OBJ_SIZE - ext->data_len;
> -               offset = ext->start * SD_DATA_OBJ_SIZE + ext->data_len;
> -               ext->data_len += reserv_len;
> -               last_ext->data_len = req->data_length - reserv_len;
> -               /*
> -                * if the previous oid has extra space, we don't need
> -                * to use SD_OP_CREATE_AND_WRITE_OBJ on this oid.
> -                */
> -               if (reserv_len > 0)
> -                       create = false;
> -       }
>
> -       total = req->data_length;
>         while (done < total) {
> -               size = http_request_read(req, data_buf, write_buffer_size);
> +               size = http_request_read(req, data_buf,
> +                                        MIN(kv_rw_buffer, total - done));
>                 if (size <= 0) {
>                         sd_err("Failed to read http request: %ld", size);
>                         ret = SD_RES_EIO;
> @@ -804,17 +777,66 @@ static int onode_populate_extents(struct kv_onode
> *onode,
>                 }
>                 ret = vdi_read_write(data_vid, data_buf, size, offset,
>                                      false, create);
> -               sd_debug("vdi_write size: %"PRIu64", offset: %"
> -                        PRIu64", ret:%d", size, offset, ret);
> +               sd_debug("vdi_write offset: %"PRIu64", size: %" PRIu64
> +                        ", for %" PRIx32 "ret: %d", offset, size,
> +                        data_vid, ret);
>                 if (ret != SD_RES_SUCCESS) {
> -                       sd_err("Failed to write data object for %s, %s",
> -                              onode->name, sd_strerror(ret));
> +                       sd_err("Failed to write data object for %" PRIx32
> +                              ", %s", data_vid, sd_strerror(ret));
>                         goto out;
>                 }
>                 done += size;
>                 offset += size;
>         }
>  out:
> +       return ret;
> +}
> +
> +static int onode_populate_extents(struct kv_onode *onode,
> +                                 struct http_request *req)
> +{
> +       struct onode_extent *ext;
> +       struct onode_extent *last_ext = onode->o_extent + onode->nr_extent
> - 1;
> +       uint64_t total, offset = 0, reserv_len;
> +       uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);
> +       int ret = SD_RES_SUCCESS;
> +       char *data_buf = NULL;
> +       uint32_t data_vid = onode->data_vid;
> +       bool create = true;
> +
> +       data_buf = xmalloc(write_buffer_size);
> +
> +       if (last_ext->data_len < req->data_length) {
> +               ext = last_ext - 1;
> +               reserv_len = (req->data_length - last_ext->data_len);
> +               offset = (ext->start + ext->count) * SD_DATA_OBJ_SIZE -
> +                        reserv_len;
> +               ret = do_vdi_write(req, data_vid, offset, reserv_len,
> +                                  data_buf, false);
> +               if (ret != SD_RES_SUCCESS) {
> +                       sd_err("Failed to do_vdi_write data_vid: %" PRIx32
> +                              ", offset: %" PRIx64 ", total: %" PRIx64
> +                              ", ret: %s", data_vid, offset, reserv_len,
> +                              sd_strerror(ret));
> +                       goto out;
> +               }
> +               offset = last_ext->start * SD_DATA_OBJ_SIZE;
> +               total = last_ext->data_len;
> +       } else {
> +               reserv_len = (last_ext->data_len - req->data_length);
> +               offset = last_ext->start * SD_DATA_OBJ_SIZE + reserv_len;
> +               total = req->data_length;
> +               if (last_ext->data_len > req->data_length)
> +                       create = false;
> +       }
> +
> +       ret = do_vdi_write(req, data_vid, offset, total, data_buf, create);
> +       if (ret != SD_RES_SUCCESS)
> +               sd_err("Failed to do_vdi_write data_vid: %" PRIx32
> +                      ", offset: %" PRIx64 ", total: %" PRIx64
> +                      ", ret: %s", data_vid, offset, total,
> +                      sd_strerror(ret));
> +out:
>         free(data_buf);
>         return ret;
>  }
> --
> 1.9.1
>
>


-- 
--
Best Regard
Robin Dong
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.wpkg.org/pipermail/sheepdog/attachments/20140919/482eee8f/attachment-0004.html>


More information about the sheepdog mailing list