[sheepdog] [PATCH 02/10] use 4KB for trim_zero_sectors instead of 512B

MORITA Kazutaka morita.kazutaka at gmail.com
Tue May 21 02:11:51 CEST 2013


From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

Most filesystems use 4KB for their block sizes, and in that case
trimming 512B may be meaningless.  It's better to use the block size
than the sector size for faster comparison.

This also allows for the caller to specify the offset of buffer so
that it can get the block-aligned offset and length.  It is in
preparation for allowing sparse objects.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 collie/common.c     |    2 +-
 collie/vdi.c        |    8 +++----
 include/util.h      |    7 +++---
 lib/util.c          |   61 +++++++++++++++++++++++++++++++++++++--------------
 sheep/gateway.c     |    4 ++--
 sheep/ops.c         |   10 ++++-----
 sheep/plain_store.c |    2 +-
 sheep/store.c       |    2 +-
 8 files changed, 63 insertions(+), 33 deletions(-)

diff --git a/collie/common.c b/collie/common.c
index 8dacbd4..9f4e001 100644
--- a/collie/common.c
+++ b/collie/common.c
@@ -64,7 +64,7 @@ int sd_read_object(uint64_t oid, void *data, unsigned int datalen,
 		return rsp->result;
 	}
 
-	untrim_zero_sectors(data, rsp->obj.offset, rsp->data_length, datalen);
+	untrim_zero_blocks(data, rsp->obj.offset, rsp->data_length, datalen);
 
 	return SD_RES_SUCCESS;
 }
diff --git a/collie/vdi.c b/collie/vdi.c
index 27a8418..fe999c0 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -379,8 +379,8 @@ static void parse_objs(uint64_t oid, obj_parser_func_t func, void *data, unsigne
 		snprintf(name + strlen(name), sizeof(name) - strlen(name),
 			 ":%d", sd_nodes[i].nid.port);
 
-		untrim_zero_sectors(buf, rsp->obj.offset,
-				    rsp->data_length, size);
+		untrim_zero_blocks(buf, rsp->obj.offset, rsp->data_length,
+				   size);
 		cb_ret = func(name, oid, rsp, buf, data);
 		if (cb_ret)
 			break;
@@ -1395,8 +1395,8 @@ static void *read_object_from(const struct sd_vnode *vnode, uint64_t oid)
 
 	switch (rsp->result) {
 	case SD_RES_SUCCESS:
-		untrim_zero_sectors(buf, rsp->obj.offset, rsp->data_length,
-				    size);
+		untrim_zero_blocks(buf, rsp->obj.offset, rsp->data_length,
+				   size);
 		break;
 	case SD_RES_NO_OBJ:
 		free(buf);
diff --git a/include/util.h b/include/util.h
index 8f92cf0..2fd64ff 100644
--- a/include/util.h
+++ b/include/util.h
@@ -14,6 +14,7 @@
 #include "list.h"
 
 #define SECTOR_SIZE (1U << 9)
+#define BLOCK_SIZE (1U << 12)
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
@@ -95,9 +96,9 @@ void reraise_crash_signal(int signo, int status);
 pid_t gettid(void);
 bool is_xattr_enabled(const char *path);
 
-void trim_zero_sectors(void *buf, uint64_t *offset, uint32_t *len);
-void untrim_zero_sectors(void *buf, uint64_t offset, uint32_t len,
-			 uint32_t requested_len);
+void trim_zero_blocks(void *buf, uint64_t *offset, uint32_t *len);
+void untrim_zero_blocks(void *buf, uint64_t offset, uint32_t len,
+			uint32_t requested_len);
 int atomic_create_and_write(const char *path, char *buf, size_t len);
 
 /* a type safe version of qsort() */
diff --git a/lib/util.c b/lib/util.c
index fac468a..2fbd2f7 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -356,36 +356,65 @@ int rmdir_r(char *dir_path)
 	return ret;
 }
 
-/* Trim zero sectors from the beginning and end of buffer */
-void trim_zero_sectors(void *buf, uint64_t *offset, uint32_t *len)
+/*
+ * Trim zero blocks from the beginning and end of buffer
+ *
+ * The caller passes the offset of 'buf' with 'poffset' so that this funciton
+ * can align the return values to BLOCK_SIZE.  'plen' points the length of the
+ * buffer.  If there are zero blocks at the beginning of the buffer, this
+ * function increases the offset and decreases the length on condition that
+ * '*poffset' is block-aligned.  If there are zero blocks at the end of the
+ * buffer, this function also decreases the length on condition that '*plen' is
+ * block-aligned.
+ */
+void trim_zero_blocks(void *buf, uint64_t *poffset, uint32_t *plen)
 {
-	const uint8_t zero[SECTOR_SIZE] = {0};
+	const uint8_t zero[BLOCK_SIZE] = {0};
 	uint8_t *p = buf;
+	uint64_t start = *poffset;
+	uint64_t offset = 0;
+	uint32_t len = *plen;
 
-	assert(*offset == 0);
+	/* trim zero blocks from the beginning of buffer */
+	while (len >= BLOCK_SIZE) {
+		size_t size = (start + offset) % BLOCK_SIZE;
+		if (size == 0)
+			size = BLOCK_SIZE;
 
-	/* trim zero sectors from the beginning of buffer */
-	while (*len >= SECTOR_SIZE) {
-		if (memcmp(p + *offset, zero, SECTOR_SIZE) != 0)
+		if (memcmp(p + offset, zero, size) != 0)
 			break;
 
-		*offset += SECTOR_SIZE;
-		*len -= SECTOR_SIZE;
+		offset += size;
+		len -= size;
 	}
-	memmove(buf, p + *offset, *len);
+	if (offset > 0)
+		memmove(buf, p + offset, len);
 
 	/* trim zero sectors from the end of buffer */
-	while (*len >= SECTOR_SIZE) {
-		if (memcmp(p + *len - SECTOR_SIZE, zero, SECTOR_SIZE) != 0)
+	while (len >= BLOCK_SIZE) {
+		size_t size = (start + len) % BLOCK_SIZE;
+		if (size == 0)
+			size = BLOCK_SIZE;
+
+		if (memcmp(p + len - size, zero, size) != 0)
 			break;
 
-		*len -= SECTOR_SIZE;
+		len -= size;
 	}
+
+	*plen = len;
+	*poffset = start + offset;
 }
 
-/* Untrim zero sectors to the beginning and end of buffer */
-void untrim_zero_sectors(void *buf, uint64_t offset, uint32_t len,
-			 uint32_t requested_len)
+/*
+ * Untrim zero blocks to the beginning and end of buffer
+ *
+ * 'offset' is the offset of 'buf' in the original buffer, 'len' is the length
+ * of 'buf', and 'requested_len' is the length of the original buffer.  'buf'
+ * must have enough spaces to contain 'requested_len' bytes.
+ */
+void untrim_zero_blocks(void *buf, uint64_t offset, uint32_t len,
+			uint32_t requested_len)
 {
 	uint8_t *p = buf;
 
diff --git a/sheep/gateway.c b/sheep/gateway.c
index 1a37ad8..5ee4229 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -89,8 +89,8 @@ out:
 	if (ret == SD_RES_SUCCESS &&
 	    req->rq.proto_ver < SD_PROTO_VER_TRIM_ZERO_SECTORS) {
 		/* the client doesn't support trimming zero bytes */
-		untrim_zero_sectors(req->data, req->rp.obj.offset,
-				    req->rp.data_length, req->rq.data_length);
+		untrim_zero_blocks(req->data, req->rp.obj.offset,
+				   req->rp.data_length, req->rq.data_length);
 		req->rp.data_length = req->rq.data_length;
 		req->rp.obj.offset = 0;
 	}
diff --git a/sheep/ops.c b/sheep/ops.c
index 6e1caa7..4489ad0 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -900,8 +900,8 @@ static int read_copy_from_replica(struct request *req, uint32_t epoch,
 	ret = gateway_read_obj(&read_req);
 
 	if (ret == SD_RES_SUCCESS)
-		untrim_zero_sectors(buf, rsp->obj.offset, rsp->data_length,
-				    SD_DATA_OBJ_SIZE);
+		untrim_zero_blocks(buf, rsp->obj.offset, rsp->data_length,
+				   SD_DATA_OBJ_SIZE);
 
 	return ret;
 }
@@ -937,7 +937,7 @@ int peer_read_obj(struct request *req)
 
 	rsp->data_length = hdr->data_length;
 	rsp->obj.offset = 0;
-	trim_zero_sectors(req->data, &rsp->obj.offset, &rsp->data_length);
+	trim_zero_blocks(req->data, &rsp->obj.offset, &rsp->data_length);
 
 	if (hdr->obj.copies)
 		rsp->obj.copies = hdr->obj.copies;
@@ -1002,8 +1002,8 @@ int peer_create_and_write_obj(struct request *req)
 		memcpy(&cow_hdr, hdr, sizeof(cow_hdr));
 		cow_hdr.data_length = SD_DATA_OBJ_SIZE;
 		cow_hdr.obj.offset = 0;
-		trim_zero_sectors(buf, &cow_hdr.obj.offset,
-				  &cow_hdr.data_length);
+		trim_zero_blocks(buf, &cow_hdr.obj.offset,
+				 &cow_hdr.data_length);
 
 		ret = do_create_and_write_obj(&iocb, &cow_hdr, epoch, buf);
 	} else
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 2e8e20c..02d0bd7 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -548,7 +548,7 @@ int default_get_hash(uint64_t oid, uint32_t epoch, uint8_t *sha1)
 		return ret;
 	}
 
-	trim_zero_sectors(buf, &offset, &length);
+	trim_zero_blocks(buf, &offset, &length);
 
 	sha1_init(&c);
 	sha1_update(&c, (uint8_t *)&offset, sizeof(offset));
diff --git a/sheep/store.c b/sheep/store.c
index 7b7fb18..56ba2b2 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -474,7 +474,7 @@ int read_backend_object(uint64_t oid, char *data, unsigned int datalen,
 		sd_eprintf("failed to read object %" PRIx64 ", %s", oid,
 			   sd_strerror(ret));
 
-	untrim_zero_sectors(data, rsp->obj.offset, rsp->data_length, datalen);
+	untrim_zero_blocks(data, rsp->obj.offset, rsp->data_length, datalen);
 
 	return ret;
 }
-- 
1.7.9.5




More information about the sheepdog mailing list