[sheepdog] [PATCH v2 2/3] sheep/http: add variable 'data_len' for o_extent[]

Robin Dong robin.k.dong at gmail.com
Wed Mar 19 05:44:24 CET 2014


From: Robin Dong <sanbai at taobao.com>

Since users could use append-operation to add 1MB data each times, the
'start' and 'count' in o_extent[] is not enough to describe the exact
length of data stored in o_extent (each o_extent may store only 1MB
data, which is small than SD_DATA_OBJ_SIZE).

Therefore we add variable 'data_len' in o_extent[] to identify the
length of data.

Signed-off-by: Robin Dong <sanbai at taobao.com>
---
 sheep/http/kv.c | 101 +++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 78 insertions(+), 23 deletions(-)

diff --git a/sheep/http/kv.c b/sheep/http/kv.c
index 9a653b4..6805f12 100644
--- a/sheep/http/kv.c
+++ b/sheep/http/kv.c
@@ -28,6 +28,7 @@ struct kv_bnode {
 struct onode_extent {
 	uint64_t start;
 	uint64_t count;
+	uint64_t data_len;
 };
 
 /*
@@ -624,7 +625,7 @@ out:
 #define KV_ONODE_INLINE_SIZE (SD_DATA_OBJ_SIZE - ONODE_HDR_SIZE)
 
 static int vdi_read_write(uint32_t vid, char *data, size_t length,
-			  off_t offset, bool is_read)
+			  off_t offset, bool is_read, bool create)
 {
 	struct sd_req hdr;
 	uint32_t idx = offset / SD_DATA_OBJ_SIZE;
@@ -643,7 +644,10 @@ static int vdi_read_write(uint32_t vid, char *data, size_t length,
 		if (is_read) {
 			sd_init_req(&hdr, SD_OP_READ_OBJ);
 		} else {
-			sd_init_req(&hdr, SD_OP_CREATE_AND_WRITE_OBJ);
+			if (create)
+				sd_init_req(&hdr, SD_OP_CREATE_AND_WRITE_OBJ);
+			else
+				sd_init_req(&hdr, SD_OP_WRITE_OBJ);
 			hdr.flags = SD_FLAG_CMD_WRITE;
 		}
 		hdr.data_length = len;
@@ -662,6 +666,7 @@ static int vdi_read_write(uint32_t vid, char *data, size_t length,
 		}
 		done += len;
 		data += len;
+		create = true;
 	}
 
 	return local_req_wait(iocb);
@@ -670,11 +675,22 @@ static int vdi_read_write(uint32_t vid, char *data, size_t length,
 static int onode_allocate_extents(struct kv_onode *onode,
 				  struct http_request *req)
 {
-	uint64_t start = 0, count;
-	int ret;
-	uint32_t data_vid = onode->data_vid, idx;
-
-	count = DIV_ROUND_UP(req->data_length, SD_DATA_OBJ_SIZE);
+	uint64_t start = 0, count, reserv_len = 0;
+	int ret = SD_RES_SUCCESS;
+	uint32_t data_vid = onode->data_vid, idx = onode->nr_extent;
+
+	/* if the previous o_extent[] has some extra sapce, use it */
+	if (idx) {
+		reserv_len = onode->o_extent[idx - 1].count * SD_DATA_OBJ_SIZE -
+			     onode->o_extent[idx - 1].data_len;
+		/*
+		 * if we can put whole request data into extra space of last
+		 * o_extent, it don't need to allocate new extent.
+		 */
+		if (req->data_length <= reserv_len)
+			goto out;
+	}
+	count = DIV_ROUND_UP((req->data_length - reserv_len), SD_DATA_OBJ_SIZE);
 	sys->cdrv->lock(data_vid);
 	ret = oalloc_new_prepare(data_vid, &start, count);
 	sys->cdrv->unlock(data_vid);
@@ -693,9 +709,9 @@ static int onode_allocate_extents(struct kv_onode *onode,
 		goto out;
 	}
 
-	idx = onode->nr_extent;
 	onode->o_extent[idx].start = start;
 	onode->o_extent[idx].count = count;
+	onode->o_extent[idx].data_len = 0;
 	onode->nr_extent++;
 out:
 	return ret;
@@ -704,16 +720,40 @@ out:
 static int onode_populate_extents(struct kv_onode *onode,
 				  struct http_request *req)
 {
+	struct onode_extent *ext;
+	struct onode_extent *last_ext = onode->o_extent + onode->nr_extent - 1;
 	ssize_t size;
-	uint64_t start = onode->o_extent[onode->nr_extent - 1].start;
-	uint64_t done = 0, total, offset;
+	uint64_t done = 0, total, offset = 0, reserv_len;
 	uint64_t write_buffer_size = MIN(kv_rw_buffer, req->data_length);
 	int ret = SD_RES_SUCCESS;
 	char *data_buf = NULL;
 	uint32_t data_vid = onode->data_vid;
+	bool create = true;
 
 	data_buf = xmalloc(write_buffer_size);
-	offset = start * SD_DATA_OBJ_SIZE;
+	if (last_ext->data_len == 0 && onode->nr_extent == 1) {
+		offset = last_ext->start * SD_DATA_OBJ_SIZE +
+			 last_ext->data_len;
+		last_ext->data_len += req->data_length;
+	} else if (last_ext->data_len > 0) {
+		offset = last_ext->start * SD_DATA_OBJ_SIZE +
+			 last_ext->data_len;
+		last_ext->data_len += req->data_length;
+		create = false;
+	} else {
+		ext = last_ext - 1;
+		reserv_len = ext->count * SD_DATA_OBJ_SIZE - ext->data_len;
+		offset = ext->start * SD_DATA_OBJ_SIZE + ext->data_len;
+		ext->data_len += reserv_len;
+		last_ext->data_len = req->data_length - reserv_len;
+		/*
+		 * if the previous oid has extra space, we don't need
+		 * to use SD_OP_CREATE_AND_WRITE_OBJ on this oid.
+		 */
+		if (reserv_len > 0)
+			create = false;
+	}
+
 	total = req->data_length;
 	while (done < total) {
 		size = http_request_read(req, data_buf, write_buffer_size);
@@ -722,7 +762,10 @@ static int onode_populate_extents(struct kv_onode *onode,
 			ret = SD_RES_EIO;
 			goto out;
 		}
-		ret = vdi_read_write(data_vid, data_buf, size, offset, false);
+		ret = vdi_read_write(data_vid, data_buf, size, offset,
+				     false, create);
+		sd_debug("vdi_write size: %"PRIu64", offset: %"
+			 PRIu64", ret:%d", size, offset, ret);
 		if (ret != SD_RES_SUCCESS) {
 			sd_err("Failed to write data object for %s, %s",
 			       onode->name, sd_strerror(ret));
@@ -777,6 +820,23 @@ out:
 	return ret;
 }
 
+static int onode_do_update(struct kv_onode *onode)
+{
+	uint64_t len;
+	int ret;
+
+	if (onode->inlined)
+		len = onode->size;
+	else
+		len = sizeof(struct onode_extent) * onode->nr_extent;
+
+	ret = sd_write_object(onode->oid, (char *)onode, ONODE_HDR_SIZE + len,
+			      0, false);
+	if (ret != SD_RES_SUCCESS)
+		sd_err("Failed to update object, %" PRIx64, onode->oid);
+	return ret;
+}
+
 static int onode_populate_data(struct kv_onode *onode, struct http_request *req)
 {
 	ssize_t size;
@@ -802,8 +862,7 @@ static int onode_populate_data(struct kv_onode *onode, struct http_request *req)
 		if (ret != SD_RES_SUCCESS)
 			goto out;
 		/* write mtime and flag ONODE_COMPLETE to onode */
-		ret = sd_write_object(onode->oid, (char *)onode,
-				      ONODE_HDR_SIZE, 0, false);
+		ret = onode_do_update(onode);
 		if (ret != SD_RES_SUCCESS) {
 			sd_err("Failed to write mtime and flags of onode %s",
 			       onode->name);
@@ -817,7 +876,6 @@ out:
 static int onode_populate_append_data(struct kv_onode *onode,
 				      struct http_request *req)
 {
-	uint64_t len;
 	int ret = SD_RES_SUCCESS;
 
 	onode->mtime = get_seconds();
@@ -825,12 +883,9 @@ static int onode_populate_append_data(struct kv_onode *onode,
 	ret = onode_populate_extents(onode, req);
 	if (ret != SD_RES_SUCCESS)
 		goto out;
-	len = sizeof(struct onode_extent) * onode->nr_extent;
-	ret = sd_write_object(onode->oid, (char *)onode,
-			      ONODE_HDR_SIZE + len, 0, false);
+	ret = onode_do_update(onode);
 	if (ret != SD_RES_SUCCESS) {
-		sd_err("Failed to write mtime and flags of onode %s",
-		       onode->name);
+		sd_err("Failed to write mtime of onode %s", onode->name);
 		goto out;
 	}
 out:
@@ -951,7 +1006,7 @@ static int onode_read_extents(struct kv_onode *onode, struct http_request *req)
 	total_size = len;
 	for (i = 0; i < onode->nr_extent; i++) {
 		ext = onode->o_extent + i;
-		ext_len = ext->count * SD_DATA_OBJ_SIZE;
+		ext_len = ext->data_len;
 		if (off >= ext_len) {
 			off -= ext_len;
 			continue;
@@ -963,8 +1018,8 @@ static int onode_read_extents(struct kv_onode *onode, struct http_request *req)
 		while (done < total) {
 			size = MIN(total - done, read_buffer_size);
 			ret = vdi_read_write(onode->data_vid, data_buf,
-					     size, offset, true);
-			sd_debug("vdi_read_write size: %"PRIu64", offset: %"
+					     size, offset, true, false);
+			sd_debug("vdi_read size: %"PRIu64", offset: %"
 				 PRIu64", ret:%d", size, offset, ret);
 			if (ret != SD_RES_SUCCESS) {
 				sd_err("Failed to read for vid %"PRIx32,
-- 
1.7.12.4




More information about the sheepdog mailing list