<div dir="ltr">This patch has been commited to sheep/http, master branch</div><div class="gmail_extra"><br><div class="gmail_quote">2014-09-19 14:30 GMT+08:00 Robin Dong <span dir="ltr"><<a href="mailto:robin.k.dong@gmail.com" target="_blank">robin.k.dong@gmail.com</a>></span>:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Robin Dong <<a href="mailto:sanbai@taobao.com">sanbai@taobao.com</a>><br>
<br>
The 'append' opreation in sheepdog contains:<br>
<br>
1. lock(data_vid)<br>
2. read onode<br>
3. allocate space and set o_extent.data_len to zero<br>
4. write onode down<br>
5. unlock(data_vid)<br>
6. read uploading data and set o_extent.data_len in terms of req->data_length<br>
7. write onode down<br>
<br>
We unlock(data_vid) too early to protect the updating of 'o_extent.data_len'<br>
so one process may overwrite the space allocated by another process.<br>
To solve this problem, we move set operation of 'data_len' into lock region<br>
and modify the code to write data_vid.<br>
<br>
Signed-off-by: Robin Dong <<a href="mailto:sanbai@taobao.com">sanbai@taobao.com</a>><br>
---<br>
v2-->v1:<br>
1. remove faulty modification of md.c<br>
<br>
sheep/http/kv.c | 106 ++++++++++++++++++++++++++++++++++----------------------<br>
1 file changed, 64 insertions(+), 42 deletions(-)<br>
<br>
diff --git a/sheep/http/kv.c b/sheep/http/kv.c<br>
index 04ee462..4611e05 100644<br>
--- a/sheep/http/kv.c<br>
+++ b/sheep/http/kv.c<br>
@@ -727,8 +727,11 @@ static int onode_allocate_extents(struct kv_onode *onode,<br>
* if we can put whole request data into extra space of last<br>
* o_extent, it don't need to allocate new extent.<br>
*/<br>
- if (req->data_length <= reserv_len)<br>
+ if (req->data_length <= reserv_len) {<br>
+ onode->o_extent[idx - 1].data_len += req->data_length;<br>
goto out;<br>
+ } else<br>
+ onode->o_extent[idx - 1].data_len += reserv_len;<br>
}<br>
count = DIV_ROUND_UP((req->data_length - reserv_len), SD_DATA_OBJ_SIZE);<br>
sys->cdrv->lock(data_vid);<br>
@@ -751,52 +754,22 @@ static int onode_allocate_extents(struct kv_onode *onode,<br>
<br>
onode->o_extent[idx].start = start;<br>
onode->o_extent[idx].count = count;<br>
- onode->o_extent[idx].data_len = 0;<br>
+ onode->o_extent[idx].data_len = req->data_length - reserv_len;<br>
onode->nr_extent++;<br>
out:<br>
return ret;<br>
}<br>
<br>
-static int onode_populate_extents(struct kv_onode *onode,<br>
- struct http_request *req)<br>
+static int do_vdi_write(struct http_request *req, uint32_t data_vid,<br>
+ uint64_t offset, uint64_t total, char *data_buf,<br>
+ bool create)<br>
{<br>
- struct onode_extent *ext;<br>
- struct onode_extent *last_ext = onode->o_extent + onode->nr_extent - 1;<br>
- ssize_t size;<br>
- uint64_t done = 0, total, offset = 0, reserv_len;<br>
- uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);<br>
+ uint64_t done = 0, size;<br>
int ret = SD_RES_SUCCESS;<br>
- char *data_buf = NULL;<br>
- uint32_t data_vid = onode->data_vid;<br>
- bool create = true;<br>
-<br>
- data_buf = xmalloc(write_buffer_size);<br>
- if (last_ext->data_len == 0 && onode->nr_extent == 1) {<br>
- offset = last_ext->start * SD_DATA_OBJ_SIZE +<br>
- last_ext->data_len;<br>
- last_ext->data_len += req->data_length;<br>
- } else if (last_ext->data_len > 0) {<br>
- offset = last_ext->start * SD_DATA_OBJ_SIZE +<br>
- last_ext->data_len;<br>
- last_ext->data_len += req->data_length;<br>
- create = false;<br>
- } else {<br>
- ext = last_ext - 1;<br>
- reserv_len = ext->count * SD_DATA_OBJ_SIZE - ext->data_len;<br>
- offset = ext->start * SD_DATA_OBJ_SIZE + ext->data_len;<br>
- ext->data_len += reserv_len;<br>
- last_ext->data_len = req->data_length - reserv_len;<br>
- /*<br>
- * if the previous oid has extra space, we don't need<br>
- * to use SD_OP_CREATE_AND_WRITE_OBJ on this oid.<br>
- */<br>
- if (reserv_len > 0)<br>
- create = false;<br>
- }<br>
<br>
- total = req->data_length;<br>
while (done < total) {<br>
- size = http_request_read(req, data_buf, write_buffer_size);<br>
+ size = http_request_read(req, data_buf,<br>
+ MIN(kv_rw_buffer, total - done));<br>
if (size <= 0) {<br>
sd_err("Failed to read http request: %ld", size);<br>
ret = SD_RES_EIO;<br>
@@ -804,17 +777,66 @@ static int onode_populate_extents(struct kv_onode *onode,<br>
}<br>
ret = vdi_read_write(data_vid, data_buf, size, offset,<br>
false, create);<br>
- sd_debug("vdi_write size: %"PRIu64", offset: %"<br>
- PRIu64", ret:%d", size, offset, ret);<br>
+ sd_debug("vdi_write offset: %"PRIu64", size: %" PRIu64<br>
+ ", for %" PRIx32 "ret: %d", offset, size,<br>
+ data_vid, ret);<br>
if (ret != SD_RES_SUCCESS) {<br>
- sd_err("Failed to write data object for %s, %s",<br>
- onode->name, sd_strerror(ret));<br>
+ sd_err("Failed to write data object for %" PRIx32<br>
+ ", %s", data_vid, sd_strerror(ret));<br>
goto out;<br>
}<br>
done += size;<br>
offset += size;<br>
}<br>
out:<br>
+ return ret;<br>
+}<br>
+<br>
+static int onode_populate_extents(struct kv_onode *onode,<br>
+ struct http_request *req)<br>
+{<br>
+ struct onode_extent *ext;<br>
+ struct onode_extent *last_ext = onode->o_extent + onode->nr_extent - 1;<br>
+ uint64_t total, offset = 0, reserv_len;<br>
+ uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);<br>
+ int ret = SD_RES_SUCCESS;<br>
+ char *data_buf = NULL;<br>
+ uint32_t data_vid = onode->data_vid;<br>
+ bool create = true;<br>
+<br>
+ data_buf = xmalloc(write_buffer_size);<br>
+<br>
+ if (last_ext->data_len < req->data_length) {<br>
+ ext = last_ext - 1;<br>
+ reserv_len = (req->data_length - last_ext->data_len);<br>
+ offset = (ext->start + ext->count) * SD_DATA_OBJ_SIZE -<br>
+ reserv_len;<br>
+ ret = do_vdi_write(req, data_vid, offset, reserv_len,<br>
+ data_buf, false);<br>
+ if (ret != SD_RES_SUCCESS) {<br>
+ sd_err("Failed to do_vdi_write data_vid: %" PRIx32<br>
+ ", offset: %" PRIx64 ", total: %" PRIx64<br>
+ ", ret: %s", data_vid, offset, reserv_len,<br>
+ sd_strerror(ret));<br>
+ goto out;<br>
+ }<br>
+ offset = last_ext->start * SD_DATA_OBJ_SIZE;<br>
+ total = last_ext->data_len;<br>
+ } else {<br>
+ reserv_len = (last_ext->data_len - req->data_length);<br>
+ offset = last_ext->start * SD_DATA_OBJ_SIZE + reserv_len;<br>
+ total = req->data_length;<br>
+ if (last_ext->data_len > req->data_length)<br>
+ create = false;<br>
+ }<br>
+<br>
+ ret = do_vdi_write(req, data_vid, offset, total, data_buf, create);<br>
+ if (ret != SD_RES_SUCCESS)<br>
+ sd_err("Failed to do_vdi_write data_vid: %" PRIx32<br>
+ ", offset: %" PRIx64 ", total: %" PRIx64<br>
+ ", ret: %s", data_vid, offset, total,<br>
+ sd_strerror(ret));<br>
+out:<br>
free(data_buf);<br>
return ret;<br>
}<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.9.1<br>
<br>
</font></span></blockquote></div><br><br clear="all"><div><br></div>-- <br>--<br>Best Regard<br>Robin Dong
</div>