<div dir="ltr">This patch has been commited to sheep/http, master branch</div><div class="gmail_extra"><br><div class="gmail_quote">2014-09-19 14:30 GMT+08:00 Robin Dong <span dir="ltr"><<a href="mailto:robin.k.dong@gmail.com" target="_blank">robin.k.dong@gmail.com</a>></span>:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Robin Dong <<a href="mailto:sanbai@taobao.com">sanbai@taobao.com</a>><br>
<br>
The 'append' opreation in sheepdog contains:<br>
<br>
1. lock(data_vid)<br>
2. read onode<br>
3. allocate space and set o_extent.data_len to zero<br>
4. write onode down<br>
5. unlock(data_vid)<br>
6. read uploading data and set o_extent.data_len in terms of req->data_length<br>
7. write onode down<br>
<br>
We unlock(data_vid) too early to protect the updating of 'o_extent.data_len'<br>
so one process may overwrite the space allocated by another process.<br>
To solve this problem, we move set operation of 'data_len' into lock region<br>
and modify the code to write data_vid.<br>
<br>
Signed-off-by: Robin Dong <<a href="mailto:sanbai@taobao.com">sanbai@taobao.com</a>><br>
---<br>
v2-->v1:<br>
  1. remove faulty modification of md.c<br>
<br>
 sheep/http/kv.c | 106 ++++++++++++++++++++++++++++++++++----------------------<br>
 1 file changed, 64 insertions(+), 42 deletions(-)<br>
<br>
diff --git a/sheep/http/kv.c b/sheep/http/kv.c<br>
index 04ee462..4611e05 100644<br>
--- a/sheep/http/kv.c<br>
+++ b/sheep/http/kv.c<br>
@@ -727,8 +727,11 @@ static int onode_allocate_extents(struct kv_onode *onode,<br>
                 * if we can put whole request data into extra space of last<br>
                 * o_extent, it don't need to allocate new extent.<br>
                 */<br>
-               if (req->data_length <= reserv_len)<br>
+               if (req->data_length <= reserv_len) {<br>
+                       onode->o_extent[idx - 1].data_len += req->data_length;<br>
                        goto out;<br>
+               } else<br>
+                       onode->o_extent[idx - 1].data_len += reserv_len;<br>
        }<br>
        count = DIV_ROUND_UP((req->data_length - reserv_len), SD_DATA_OBJ_SIZE);<br>
        sys->cdrv->lock(data_vid);<br>
@@ -751,52 +754,22 @@ static int onode_allocate_extents(struct kv_onode *onode,<br>
<br>
        onode->o_extent[idx].start = start;<br>
        onode->o_extent[idx].count = count;<br>
-       onode->o_extent[idx].data_len = 0;<br>
+       onode->o_extent[idx].data_len = req->data_length - reserv_len;<br>
        onode->nr_extent++;<br>
 out:<br>
        return ret;<br>
 }<br>
<br>
-static int onode_populate_extents(struct kv_onode *onode,<br>
-                                 struct http_request *req)<br>
+static int do_vdi_write(struct http_request *req, uint32_t data_vid,<br>
+                       uint64_t offset, uint64_t total, char *data_buf,<br>
+                       bool create)<br>
 {<br>
-       struct onode_extent *ext;<br>
-       struct onode_extent *last_ext = onode->o_extent + onode->nr_extent - 1;<br>
-       ssize_t size;<br>
-       uint64_t done = 0, total, offset = 0, reserv_len;<br>
-       uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);<br>
+       uint64_t done = 0, size;<br>
        int ret = SD_RES_SUCCESS;<br>
-       char *data_buf = NULL;<br>
-       uint32_t data_vid = onode->data_vid;<br>
-       bool create = true;<br>
-<br>
-       data_buf = xmalloc(write_buffer_size);<br>
-       if (last_ext->data_len == 0 && onode->nr_extent == 1) {<br>
-               offset = last_ext->start * SD_DATA_OBJ_SIZE +<br>
-                        last_ext->data_len;<br>
-               last_ext->data_len += req->data_length;<br>
-       } else if (last_ext->data_len > 0) {<br>
-               offset = last_ext->start * SD_DATA_OBJ_SIZE +<br>
-                        last_ext->data_len;<br>
-               last_ext->data_len += req->data_length;<br>
-               create = false;<br>
-       } else {<br>
-               ext = last_ext - 1;<br>
-               reserv_len = ext->count * SD_DATA_OBJ_SIZE - ext->data_len;<br>
-               offset = ext->start * SD_DATA_OBJ_SIZE + ext->data_len;<br>
-               ext->data_len += reserv_len;<br>
-               last_ext->data_len = req->data_length - reserv_len;<br>
-               /*<br>
-                * if the previous oid has extra space, we don't need<br>
-                * to use SD_OP_CREATE_AND_WRITE_OBJ on this oid.<br>
-                */<br>
-               if (reserv_len > 0)<br>
-                       create = false;<br>
-       }<br>
<br>
-       total = req->data_length;<br>
        while (done < total) {<br>
-               size = http_request_read(req, data_buf, write_buffer_size);<br>
+               size = http_request_read(req, data_buf,<br>
+                                        MIN(kv_rw_buffer, total - done));<br>
                if (size <= 0) {<br>
                        sd_err("Failed to read http request: %ld", size);<br>
                        ret = SD_RES_EIO;<br>
@@ -804,17 +777,66 @@ static int onode_populate_extents(struct kv_onode *onode,<br>
                }<br>
                ret = vdi_read_write(data_vid, data_buf, size, offset,<br>
                                     false, create);<br>
-               sd_debug("vdi_write size: %"PRIu64", offset: %"<br>
-                        PRIu64", ret:%d", size, offset, ret);<br>
+               sd_debug("vdi_write offset: %"PRIu64", size: %" PRIu64<br>
+                        ", for %" PRIx32 "ret: %d", offset, size,<br>
+                        data_vid, ret);<br>
                if (ret != SD_RES_SUCCESS) {<br>
-                       sd_err("Failed to write data object for %s, %s",<br>
-                              onode->name, sd_strerror(ret));<br>
+                       sd_err("Failed to write data object for %" PRIx32<br>
+                              ", %s", data_vid, sd_strerror(ret));<br>
                        goto out;<br>
                }<br>
                done += size;<br>
                offset += size;<br>
        }<br>
 out:<br>
+       return ret;<br>
+}<br>
+<br>
+static int onode_populate_extents(struct kv_onode *onode,<br>
+                                 struct http_request *req)<br>
+{<br>
+       struct onode_extent *ext;<br>
+       struct onode_extent *last_ext = onode->o_extent + onode->nr_extent - 1;<br>
+       uint64_t total, offset = 0, reserv_len;<br>
+       uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);<br>
+       int ret = SD_RES_SUCCESS;<br>
+       char *data_buf = NULL;<br>
+       uint32_t data_vid = onode->data_vid;<br>
+       bool create = true;<br>
+<br>
+       data_buf = xmalloc(write_buffer_size);<br>
+<br>
+       if (last_ext->data_len < req->data_length) {<br>
+               ext = last_ext - 1;<br>
+               reserv_len = (req->data_length - last_ext->data_len);<br>
+               offset = (ext->start + ext->count) * SD_DATA_OBJ_SIZE -<br>
+                        reserv_len;<br>
+               ret = do_vdi_write(req, data_vid, offset, reserv_len,<br>
+                                  data_buf, false);<br>
+               if (ret != SD_RES_SUCCESS) {<br>
+                       sd_err("Failed to do_vdi_write data_vid: %" PRIx32<br>
+                              ", offset: %" PRIx64 ", total: %" PRIx64<br>
+                              ", ret: %s", data_vid, offset, reserv_len,<br>
+                              sd_strerror(ret));<br>
+                       goto out;<br>
+               }<br>
+               offset = last_ext->start * SD_DATA_OBJ_SIZE;<br>
+               total = last_ext->data_len;<br>
+       } else {<br>
+               reserv_len = (last_ext->data_len - req->data_length);<br>
+               offset = last_ext->start * SD_DATA_OBJ_SIZE + reserv_len;<br>
+               total = req->data_length;<br>
+               if (last_ext->data_len > req->data_length)<br>
+                       create = false;<br>
+       }<br>
+<br>
+       ret = do_vdi_write(req, data_vid, offset, total, data_buf, create);<br>
+       if (ret != SD_RES_SUCCESS)<br>
+               sd_err("Failed to do_vdi_write data_vid: %" PRIx32<br>
+                      ", offset: %" PRIx64 ", total: %" PRIx64<br>
+                      ", ret: %s", data_vid, offset, total,<br>
+                      sd_strerror(ret));<br>
+out:<br>
        free(data_buf);<br>
        return ret;<br>
 }<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.9.1<br>
<br>
</font></span></blockquote></div><br><br clear="all"><div><br></div>-- <br>--<br>Best Regard<br>Robin Dong
</div>