[sheepdog] [PATCH v3] sheep: check memory address alignment for direct IO

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Thu May 23 13:01:14 CEST 2013


Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---

v3:
 - rename get_open_flags() to prepare_iocb()
 - move alignment check into prepare_iocb()

v2:
 - move alignment check ingo get_open_flags()
 - add check for object cache IO

 sheep/object_cache.c |  8 ++++++--
 sheep/plain_store.c  | 38 +++++++++++++++-----------------------
 sheep/sheep_priv.h   |  5 +++++
 3 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 3aa7fd6..18f1198 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -288,8 +288,10 @@ static int read_cache_object_noupdate(uint32_t vid, uint32_t idx, void *buf,
 	snprintf(p, sizeof(p), "%s/%06"PRIx32"/%08"PRIx32, object_cache_dir,
 		 vid, idx);
 
-	if (sys->object_cache_directio && !idx_has_vdi_bit(idx))
+	if (sys->object_cache_directio && !idx_has_vdi_bit(idx)) {
+		assert(is_aligned_to_pagesize(buf));
 		flags |= O_DIRECT;
+	}
 
 	fd = open(p, flags, sd_def_fmode);
 	if (fd < 0) {
@@ -322,8 +324,10 @@ static int write_cache_object_noupdate(uint32_t vid, uint32_t idx, void *buf,
 
 	snprintf(p, sizeof(p), "%s/%06"PRIx32"/%08"PRIx32, object_cache_dir,
 		 vid, idx);
-	if (sys->object_cache_directio && !idx_has_vdi_bit(idx))
+	if (sys->object_cache_directio && !idx_has_vdi_bit(idx)) {
+		assert(is_aligned_to_pagesize(buf));
 		flags |= O_DIRECT;
+	}
 
 	fd = open(p, flags, sd_def_fmode);
 	if (fd < 0) {
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index f895530..9fb9ad1 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -20,7 +20,14 @@
 #include "config.h"
 #include "sha1.h"
 
-static int get_open_flags(uint64_t oid, bool create)
+#define sector_algined(x) ({ ((x) & (SECTOR_SIZE - 1)) == 0; })
+
+static inline bool iocb_is_aligned(const struct siocb *iocb)
+{
+	return  sector_algined(iocb->offset) && sector_algined(iocb->length);
+}
+
+static int prepare_iocb(uint64_t oid, const struct siocb *iocb, bool create)
 {
 	int flags = O_DSYNC | O_RDWR;
 
@@ -28,8 +35,10 @@ static int get_open_flags(uint64_t oid, bool create)
 		flags &= ~O_DSYNC;
 
 	/* We can not use DIO for inode object because it is not 512B aligned */
-	if (sys->backend_dio && is_data_obj(oid))
+	if (sys->backend_dio && is_data_obj(oid) && iocb_is_aligned(iocb)) {
+		assert(is_aligned_to_pagesize(iocb->buf));
 		flags |= O_DIRECT;
+	}
 
 	if (create)
 		flags |= O_CREAT | O_EXCL;
@@ -90,16 +99,9 @@ static int err_to_sderr(char *path, uint64_t oid, int err)
 	}
 }
 
-#define sector_algined(x) ({ ((x) & (SECTOR_SIZE - 1)) == 0; })
-
-static inline bool iocb_is_aligned(const struct siocb *iocb)
-{
-	return  sector_algined(iocb->offset) && sector_algined(iocb->length);
-}
-
 int default_write(uint64_t oid, const struct siocb *iocb)
 {
-	int flags = get_open_flags(oid, false), fd,
+	int flags = prepare_iocb(oid, iocb, false), fd,
 	    ret = SD_RES_SUCCESS;
 	char path[PATH_MAX];
 	ssize_t size;
@@ -109,9 +111,6 @@ int default_write(uint64_t oid, const struct siocb *iocb)
 		return SD_RES_OLD_NODE_VER;
 	}
 
-	if (flags & O_DIRECT && !iocb_is_aligned(iocb))
-		flags &= ~O_DIRECT;
-
 	if (uatomic_is_true(&sys->use_journal) &&
 	    journal_write_store(oid, iocb->buf, iocb->length, iocb->offset,
 				false)
@@ -183,8 +182,8 @@ int default_cleanup(void)
 static int init_vdi_state(uint64_t oid, char *wd)
 {
 	char path[PATH_MAX];
-	int fd, flags = get_open_flags(oid, false), ret;
 	struct sd_inode *inode = xzalloc(sizeof(*inode));
+	int fd, flags = O_RDONLY, ret;
 
 	snprintf(path, sizeof(path), "%s/%016"PRIx64, wd, oid);
 
@@ -243,13 +242,10 @@ int default_init(void)
 static int default_read_from_path(uint64_t oid, char *path,
 				  const struct siocb *iocb)
 {
-	int flags = get_open_flags(oid, false), fd,
+	int flags = prepare_iocb(oid, iocb, false), fd,
 	    ret = SD_RES_SUCCESS;
 	ssize_t size;
 
-	if (flags & O_DIRECT && !iocb_is_aligned(iocb))
-		flags &= ~O_DIRECT;
-
 	fd = open(path, flags);
 
 	if (fd < 0)
@@ -305,17 +301,13 @@ int prealloc(int fd, uint32_t size)
 int default_create_and_write(uint64_t oid, const struct siocb *iocb)
 {
 	char path[PATH_MAX], tmp_path[PATH_MAX];
-	int flags = get_open_flags(oid, true);
+	int flags = prepare_iocb(oid, iocb, true);
 	int ret, fd;
 	uint32_t len = iocb->length;
 
 	get_obj_path(oid, path);
 	get_tmp_obj_path(oid, tmp_path);
 
-	if (flags & O_DIRECT && !iocb_is_aligned(iocb))
-		/* Drop the O_DIRECT for create operation for simplicity */
-		flags &= ~O_DIRECT;
-
 	if (uatomic_is_true(&sys->use_journal) &&
 	    journal_write_store(oid, iocb->buf, iocb->length,
 				iocb->offset, true)
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index ae5249d..f987238 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -209,6 +209,11 @@ static inline uint32_t sys_epoch(void)
 	return uatomic_read(&sys->epoch);
 }
 
+static inline bool is_aligned_to_pagesize(void *p)
+{
+	return ((uintptr_t)p & (getpagesize() - 1)) == 0;
+}
+
 int create_listen_port(char *bindaddr, int port);
 int init_unix_domain_socket(const char *dir);
 
-- 
1.8.1.3.566.gaa39828



More information about the sheepdog mailing list