[sheepdog] [PATCH v4 2/5] plain store: add support to non-aglined read/write

Liu Yuan namei.unix at gmail.com
Thu May 23 05:27:22 CEST 2013


For write, we do a read/modify/write process and for read, we just extend the
read as aligned one and copy the exact length to feed the buffer

Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
 sheep/plain_store.c |  100 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 93 insertions(+), 7 deletions(-)

diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index b932d6a..e2680c7 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -90,6 +90,59 @@ static int err_to_sderr(char *path, uint64_t oid, int err)
 	}
 }
 
+#define sector_algined(x) ({ ((x) & (SECTOR_SIZE - 1)) == 0; })
+
+static inline bool iocb_is_aligned(const struct siocb *iocb)
+{
+	return  sector_algined(iocb->offset) && sector_algined(iocb->length);
+}
+
+static inline int do_aligned_write(uint64_t oid, const struct siocb *iocb)
+{
+	struct siocb new = {
+		.epoch = iocb->epoch,
+	};
+	uint32_t new_len = round_up(iocb->offset + iocb->length, SECTOR_SIZE) -
+		round_down(iocb->offset, SECTOR_SIZE);
+	uint64_t new_off = round_down(iocb->offset, SECTOR_SIZE);
+	int ret = SD_RES_SUCCESS;
+
+	sd_dprintf("new %"PRIu64 ", %"PRIu32 ", old %"PRIu64 ", %"PRIu32,
+		   new_off, new_len, iocb->offset, iocb->length);
+	new.buf = xvalloc(new_len);
+	/*
+	 * We have to read last sector first because default_read feed buffer
+	 * from the beginning.
+	 */
+	if (!sector_algined(iocb->offset + iocb->length)) {
+		new.offset = round_down(iocb->offset + iocb->length,
+					SECTOR_SIZE);
+		new.length = SECTOR_SIZE;
+		ret = default_read(oid, &new);
+		if (ret != SD_RES_SUCCESS)
+			goto out;
+		memcpy((char *)new.buf + new_len - SECTOR_SIZE, new.buf,
+		       SECTOR_SIZE);
+	}
+	if ((new_len > SECTOR_SIZE ||
+	     sector_algined(iocb->offset + iocb->length)) &&
+	     !sector_algined(iocb->offset)) {
+		new.offset = new_off;
+		new.length = SECTOR_SIZE;
+		ret = default_read(oid, &new);
+		if (ret != SD_RES_SUCCESS)
+			goto out;
+	}
+	memcpy((char *)new.buf + iocb->offset - new_off, iocb->buf,
+	       iocb->length);
+	new.offset = new_off;
+	new.length = new_len;
+	ret = default_write(oid, &new);
+out:
+	free(new.buf);
+	return ret;
+}
+
 int default_write(uint64_t oid, const struct siocb *iocb)
 {
 	int flags = get_open_flags(oid, false), fd,
@@ -102,7 +155,8 @@ int default_write(uint64_t oid, const struct siocb *iocb)
 		return SD_RES_OLD_NODE_VER;
 	}
 
-	get_obj_path(oid, path);
+	if (flags & O_DIRECT && !iocb_is_aligned(iocb))
+		return do_aligned_write(oid, iocb);
 
 	if (uatomic_is_true(&sys->use_journal) &&
 	    journal_write_store(oid, iocb->buf, iocb->length, iocb->offset,
@@ -114,6 +168,8 @@ int default_write(uint64_t oid, const struct siocb *iocb)
 		sync();
 	}
 
+	get_obj_path(oid, path);
+
 	fd = open(path, flags, sd_def_fmode);
 	if (fd < 0)
 		return err_to_sderr(path, oid, errno);
@@ -121,8 +177,8 @@ int default_write(uint64_t oid, const struct siocb *iocb)
 	size = xpwrite(fd, iocb->buf, iocb->length, iocb->offset);
 	if (size != iocb->length) {
 		sd_eprintf("failed to write object %"PRIx64", path=%s, offset=%"
-			PRId64", size=%"PRId32", result=%zd, %m", oid, path,
-			iocb->offset, iocb->length, size);
+			   PRId64", size=%"PRId32", result=%zd, %m", oid, path,
+			   iocb->offset, iocb->length, size);
 		ret = err_to_sderr(path, oid, errno);
 		goto out;
 	}
@@ -231,12 +287,40 @@ int default_init(void)
 }
 
 static int default_read_from_path(uint64_t oid, char *path,
+				  const struct siocb *iocb);
+
+static int do_aligned_read(uint64_t oid, char *path, const struct siocb *iocb)
+{
+	struct siocb new = {
+		.offset = round_down(iocb->offset, SECTOR_SIZE),
+		.length = round_up(iocb->offset + iocb->length, SECTOR_SIZE) -
+			  round_down(iocb->offset, SECTOR_SIZE)
+	};
+	int ret;
+
+	sd_dprintf("new %"PRIu64 ", %"PRIu32 ", old %"PRIu64 ", %"PRIu32,
+		   new.offset, new.length, iocb->offset, iocb->length);
+	new.buf = xvalloc(new.length);
+	ret = default_read_from_path(oid, path, &new);
+	if (ret != SD_RES_SUCCESS)
+		goto out;
+	memcpy(iocb->buf, (char *)new.buf + iocb->offset - new.offset,
+	       iocb->length);
+out:
+	free(new.buf);
+	return ret;
+}
+
+static int default_read_from_path(uint64_t oid, char *path,
 				  const struct siocb *iocb)
 {
 	int flags = get_open_flags(oid, false), fd,
 	    ret = SD_RES_SUCCESS;
 	ssize_t size;
 
+	if (flags & O_DIRECT && !iocb_is_aligned(iocb))
+		return do_aligned_read(oid, path, iocb);
+
 	fd = open(path, flags);
 
 	if (fd < 0)
@@ -245,13 +329,11 @@ static int default_read_from_path(uint64_t oid, char *path,
 	size = xpread(fd, iocb->buf, iocb->length, iocb->offset);
 	if (size != iocb->length) {
 		sd_eprintf("failed to read object %"PRIx64", path=%s, offset=%"
-			PRId64", size=%"PRId32", result=%zd, %m", oid, path,
-			iocb->offset, iocb->length, size);
+			   PRId64", size=%"PRId32", result=%zd, %m", oid, path,
+			   iocb->offset, iocb->length, size);
 		ret = err_to_sderr(path, oid, errno);
 	}
-
 	close(fd);
-
 	return ret;
 }
 
@@ -301,6 +383,10 @@ int default_create_and_write(uint64_t oid, const struct siocb *iocb)
 	get_obj_path(oid, path);
 	get_tmp_obj_path(oid, tmp_path);
 
+	if (flags & O_DIRECT && !iocb_is_aligned(iocb))
+		/* Drop the O_DIRECT for create operation for simplicity */
+		flags &= ~O_DIRECT;
+
 	if (uatomic_is_true(&sys->use_journal) &&
 	    journal_write_store(oid, iocb->buf, iocb->length,
 				iocb->offset, true)
-- 
1.7.9.5




More information about the sheepdog mailing list