[sheepdog] [PATCH v3] sheep: check memory address alignment for direct IO
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Thu May 23 13:01:14 CEST 2013
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
v3:
- rename get_open_flags() to prepare_iocb()
- move alignment check into prepare_iocb()
v2:
- move alignment check ingo get_open_flags()
- add check for object cache IO
sheep/object_cache.c | 8 ++++++--
sheep/plain_store.c | 38 +++++++++++++++-----------------------
sheep/sheep_priv.h | 5 +++++
3 files changed, 26 insertions(+), 25 deletions(-)
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 3aa7fd6..18f1198 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -288,8 +288,10 @@ static int read_cache_object_noupdate(uint32_t vid, uint32_t idx, void *buf,
snprintf(p, sizeof(p), "%s/%06"PRIx32"/%08"PRIx32, object_cache_dir,
vid, idx);
- if (sys->object_cache_directio && !idx_has_vdi_bit(idx))
+ if (sys->object_cache_directio && !idx_has_vdi_bit(idx)) {
+ assert(is_aligned_to_pagesize(buf));
flags |= O_DIRECT;
+ }
fd = open(p, flags, sd_def_fmode);
if (fd < 0) {
@@ -322,8 +324,10 @@ static int write_cache_object_noupdate(uint32_t vid, uint32_t idx, void *buf,
snprintf(p, sizeof(p), "%s/%06"PRIx32"/%08"PRIx32, object_cache_dir,
vid, idx);
- if (sys->object_cache_directio && !idx_has_vdi_bit(idx))
+ if (sys->object_cache_directio && !idx_has_vdi_bit(idx)) {
+ assert(is_aligned_to_pagesize(buf));
flags |= O_DIRECT;
+ }
fd = open(p, flags, sd_def_fmode);
if (fd < 0) {
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index f895530..9fb9ad1 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -20,7 +20,14 @@
#include "config.h"
#include "sha1.h"
-static int get_open_flags(uint64_t oid, bool create)
+#define sector_algined(x) ({ ((x) & (SECTOR_SIZE - 1)) == 0; })
+
+static inline bool iocb_is_aligned(const struct siocb *iocb)
+{
+ return sector_algined(iocb->offset) && sector_algined(iocb->length);
+}
+
+static int prepare_iocb(uint64_t oid, const struct siocb *iocb, bool create)
{
int flags = O_DSYNC | O_RDWR;
@@ -28,8 +35,10 @@ static int get_open_flags(uint64_t oid, bool create)
flags &= ~O_DSYNC;
/* We can not use DIO for inode object because it is not 512B aligned */
- if (sys->backend_dio && is_data_obj(oid))
+ if (sys->backend_dio && is_data_obj(oid) && iocb_is_aligned(iocb)) {
+ assert(is_aligned_to_pagesize(iocb->buf));
flags |= O_DIRECT;
+ }
if (create)
flags |= O_CREAT | O_EXCL;
@@ -90,16 +99,9 @@ static int err_to_sderr(char *path, uint64_t oid, int err)
}
}
-#define sector_algined(x) ({ ((x) & (SECTOR_SIZE - 1)) == 0; })
-
-static inline bool iocb_is_aligned(const struct siocb *iocb)
-{
- return sector_algined(iocb->offset) && sector_algined(iocb->length);
-}
-
int default_write(uint64_t oid, const struct siocb *iocb)
{
- int flags = get_open_flags(oid, false), fd,
+ int flags = prepare_iocb(oid, iocb, false), fd,
ret = SD_RES_SUCCESS;
char path[PATH_MAX];
ssize_t size;
@@ -109,9 +111,6 @@ int default_write(uint64_t oid, const struct siocb *iocb)
return SD_RES_OLD_NODE_VER;
}
- if (flags & O_DIRECT && !iocb_is_aligned(iocb))
- flags &= ~O_DIRECT;
-
if (uatomic_is_true(&sys->use_journal) &&
journal_write_store(oid, iocb->buf, iocb->length, iocb->offset,
false)
@@ -183,8 +182,8 @@ int default_cleanup(void)
static int init_vdi_state(uint64_t oid, char *wd)
{
char path[PATH_MAX];
- int fd, flags = get_open_flags(oid, false), ret;
struct sd_inode *inode = xzalloc(sizeof(*inode));
+ int fd, flags = O_RDONLY, ret;
snprintf(path, sizeof(path), "%s/%016"PRIx64, wd, oid);
@@ -243,13 +242,10 @@ int default_init(void)
static int default_read_from_path(uint64_t oid, char *path,
const struct siocb *iocb)
{
- int flags = get_open_flags(oid, false), fd,
+ int flags = prepare_iocb(oid, iocb, false), fd,
ret = SD_RES_SUCCESS;
ssize_t size;
- if (flags & O_DIRECT && !iocb_is_aligned(iocb))
- flags &= ~O_DIRECT;
-
fd = open(path, flags);
if (fd < 0)
@@ -305,17 +301,13 @@ int prealloc(int fd, uint32_t size)
int default_create_and_write(uint64_t oid, const struct siocb *iocb)
{
char path[PATH_MAX], tmp_path[PATH_MAX];
- int flags = get_open_flags(oid, true);
+ int flags = prepare_iocb(oid, iocb, true);
int ret, fd;
uint32_t len = iocb->length;
get_obj_path(oid, path);
get_tmp_obj_path(oid, tmp_path);
- if (flags & O_DIRECT && !iocb_is_aligned(iocb))
- /* Drop the O_DIRECT for create operation for simplicity */
- flags &= ~O_DIRECT;
-
if (uatomic_is_true(&sys->use_journal) &&
journal_write_store(oid, iocb->buf, iocb->length,
iocb->offset, true)
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index ae5249d..f987238 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -209,6 +209,11 @@ static inline uint32_t sys_epoch(void)
return uatomic_read(&sys->epoch);
}
+static inline bool is_aligned_to_pagesize(void *p)
+{
+ return ((uintptr_t)p & (getpagesize() - 1)) == 0;
+}
+
int create_listen_port(char *bindaddr, int port);
int init_unix_domain_socket(const char *dir);
--
1.8.1.3.566.gaa39828
More information about the sheepdog
mailing list