[sheepdog] [PATCH v2] sheep/http: fix parallely append-upload problem
Robin Dong
robin.k.dong at gmail.com
Fri Sep 19 08:33:42 CEST 2014
This patch has been commited to sheep/http, master branch
2014-09-19 14:30 GMT+08:00 Robin Dong <robin.k.dong at gmail.com>:
> From: Robin Dong <sanbai at taobao.com>
>
> The 'append' opreation in sheepdog contains:
>
> 1. lock(data_vid)
> 2. read onode
> 3. allocate space and set o_extent.data_len to zero
> 4. write onode down
> 5. unlock(data_vid)
> 6. read uploading data and set o_extent.data_len in terms of
> req->data_length
> 7. write onode down
>
> We unlock(data_vid) too early to protect the updating of
> 'o_extent.data_len'
> so one process may overwrite the space allocated by another process.
> To solve this problem, we move set operation of 'data_len' into lock region
> and modify the code to write data_vid.
>
> Signed-off-by: Robin Dong <sanbai at taobao.com>
> ---
> v2-->v1:
> 1. remove faulty modification of md.c
>
> sheep/http/kv.c | 106
> ++++++++++++++++++++++++++++++++++----------------------
> 1 file changed, 64 insertions(+), 42 deletions(-)
>
> diff --git a/sheep/http/kv.c b/sheep/http/kv.c
> index 04ee462..4611e05 100644
> --- a/sheep/http/kv.c
> +++ b/sheep/http/kv.c
> @@ -727,8 +727,11 @@ static int onode_allocate_extents(struct kv_onode
> *onode,
> * if we can put whole request data into extra space of
> last
> * o_extent, it don't need to allocate new extent.
> */
> - if (req->data_length <= reserv_len)
> + if (req->data_length <= reserv_len) {
> + onode->o_extent[idx - 1].data_len +=
> req->data_length;
> goto out;
> + } else
> + onode->o_extent[idx - 1].data_len += reserv_len;
> }
> count = DIV_ROUND_UP((req->data_length - reserv_len),
> SD_DATA_OBJ_SIZE);
> sys->cdrv->lock(data_vid);
> @@ -751,52 +754,22 @@ static int onode_allocate_extents(struct kv_onode
> *onode,
>
> onode->o_extent[idx].start = start;
> onode->o_extent[idx].count = count;
> - onode->o_extent[idx].data_len = 0;
> + onode->o_extent[idx].data_len = req->data_length - reserv_len;
> onode->nr_extent++;
> out:
> return ret;
> }
>
> -static int onode_populate_extents(struct kv_onode *onode,
> - struct http_request *req)
> +static int do_vdi_write(struct http_request *req, uint32_t data_vid,
> + uint64_t offset, uint64_t total, char *data_buf,
> + bool create)
> {
> - struct onode_extent *ext;
> - struct onode_extent *last_ext = onode->o_extent + onode->nr_extent
> - 1;
> - ssize_t size;
> - uint64_t done = 0, total, offset = 0, reserv_len;
> - uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);
> + uint64_t done = 0, size;
> int ret = SD_RES_SUCCESS;
> - char *data_buf = NULL;
> - uint32_t data_vid = onode->data_vid;
> - bool create = true;
> -
> - data_buf = xmalloc(write_buffer_size);
> - if (last_ext->data_len == 0 && onode->nr_extent == 1) {
> - offset = last_ext->start * SD_DATA_OBJ_SIZE +
> - last_ext->data_len;
> - last_ext->data_len += req->data_length;
> - } else if (last_ext->data_len > 0) {
> - offset = last_ext->start * SD_DATA_OBJ_SIZE +
> - last_ext->data_len;
> - last_ext->data_len += req->data_length;
> - create = false;
> - } else {
> - ext = last_ext - 1;
> - reserv_len = ext->count * SD_DATA_OBJ_SIZE - ext->data_len;
> - offset = ext->start * SD_DATA_OBJ_SIZE + ext->data_len;
> - ext->data_len += reserv_len;
> - last_ext->data_len = req->data_length - reserv_len;
> - /*
> - * if the previous oid has extra space, we don't need
> - * to use SD_OP_CREATE_AND_WRITE_OBJ on this oid.
> - */
> - if (reserv_len > 0)
> - create = false;
> - }
>
> - total = req->data_length;
> while (done < total) {
> - size = http_request_read(req, data_buf, write_buffer_size);
> + size = http_request_read(req, data_buf,
> + MIN(kv_rw_buffer, total - done));
> if (size <= 0) {
> sd_err("Failed to read http request: %ld", size);
> ret = SD_RES_EIO;
> @@ -804,17 +777,66 @@ static int onode_populate_extents(struct kv_onode
> *onode,
> }
> ret = vdi_read_write(data_vid, data_buf, size, offset,
> false, create);
> - sd_debug("vdi_write size: %"PRIu64", offset: %"
> - PRIu64", ret:%d", size, offset, ret);
> + sd_debug("vdi_write offset: %"PRIu64", size: %" PRIu64
> + ", for %" PRIx32 "ret: %d", offset, size,
> + data_vid, ret);
> if (ret != SD_RES_SUCCESS) {
> - sd_err("Failed to write data object for %s, %s",
> - onode->name, sd_strerror(ret));
> + sd_err("Failed to write data object for %" PRIx32
> + ", %s", data_vid, sd_strerror(ret));
> goto out;
> }
> done += size;
> offset += size;
> }
> out:
> + return ret;
> +}
> +
> +static int onode_populate_extents(struct kv_onode *onode,
> + struct http_request *req)
> +{
> + struct onode_extent *ext;
> + struct onode_extent *last_ext = onode->o_extent + onode->nr_extent
> - 1;
> + uint64_t total, offset = 0, reserv_len;
> + uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);
> + int ret = SD_RES_SUCCESS;
> + char *data_buf = NULL;
> + uint32_t data_vid = onode->data_vid;
> + bool create = true;
> +
> + data_buf = xmalloc(write_buffer_size);
> +
> + if (last_ext->data_len < req->data_length) {
> + ext = last_ext - 1;
> + reserv_len = (req->data_length - last_ext->data_len);
> + offset = (ext->start + ext->count) * SD_DATA_OBJ_SIZE -
> + reserv_len;
> + ret = do_vdi_write(req, data_vid, offset, reserv_len,
> + data_buf, false);
> + if (ret != SD_RES_SUCCESS) {
> + sd_err("Failed to do_vdi_write data_vid: %" PRIx32
> + ", offset: %" PRIx64 ", total: %" PRIx64
> + ", ret: %s", data_vid, offset, reserv_len,
> + sd_strerror(ret));
> + goto out;
> + }
> + offset = last_ext->start * SD_DATA_OBJ_SIZE;
> + total = last_ext->data_len;
> + } else {
> + reserv_len = (last_ext->data_len - req->data_length);
> + offset = last_ext->start * SD_DATA_OBJ_SIZE + reserv_len;
> + total = req->data_length;
> + if (last_ext->data_len > req->data_length)
> + create = false;
> + }
> +
> + ret = do_vdi_write(req, data_vid, offset, total, data_buf, create);
> + if (ret != SD_RES_SUCCESS)
> + sd_err("Failed to do_vdi_write data_vid: %" PRIx32
> + ", offset: %" PRIx64 ", total: %" PRIx64
> + ", ret: %s", data_vid, offset, total,
> + sd_strerror(ret));
> +out:
> free(data_buf);
> return ret;
> }
> --
> 1.9.1
>
>
--
--
Best Regard
Robin Dong
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.wpkg.org/pipermail/sheepdog/attachments/20140919/482eee8f/attachment-0004.html>
More information about the sheepdog
mailing list