[sheepdog] [PATCH 2/4] plain store: add support to non-aglined read/write
Liu Yuan
namei.unix at gmail.com
Wed May 22 10:36:05 CEST 2013
For write, we do a read/modify/write process and for read, we just extend the
read as aligned one and copy the exact length to feed the buffer
Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
sheep/plain_store.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 76 insertions(+), 6 deletions(-)
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index b932d6a..846d9b2 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -37,6 +37,11 @@ static int get_open_flags(uint64_t oid, bool create)
return flags;
}
+static inline bool flags_direct(int flags)
+{
+ return flags & O_DIRECT;
+}
+
static int get_obj_path(uint64_t oid, char *path)
{
return snprintf(path, PATH_MAX, "%s/%016" PRIx64,
@@ -90,6 +95,38 @@ static int err_to_sderr(char *path, uint64_t oid, int err)
}
}
+#define sector_algined(x) ({(x) % SECTOR_SIZE == 0;})
+
+static inline bool is_aligned(const struct siocb *iocb)
+{
+ return sector_algined(iocb->offset) && sector_algined(iocb->length);
+}
+
+static inline int do_aligned_write(uint64_t oid, const struct siocb *iocb)
+{
+ struct siocb new = {
+ .offset = round_down(iocb->offset, SECTOR_SIZE),
+ .length = round_up(iocb->offset + iocb->length, SECTOR_SIZE) -
+ round_down(iocb->offset, SECTOR_SIZE),
+ .epoch = iocb->epoch,
+ };
+ int ret = SD_RES_SUCCESS;
+
+ sd_dprintf("new %"PRIu64 ", %"PRIu32 ", old %"PRIu64 ", %"PRIu32,
+ new.offset, new.length, iocb->offset, iocb->length);
+ new.buf = xvalloc(new.length);
+ ret = default_read(oid, &new);
+ if (ret != SD_RES_SUCCESS) {
+ free(new.buf);
+ return ret;
+ }
+ memcpy((char *)new.buf + iocb->offset - new.offset, (char *)iocb->buf,
+ iocb->length);
+ ret = default_write(oid, &new);
+ free(new.buf);
+ return ret;
+}
+
int default_write(uint64_t oid, const struct siocb *iocb)
{
int flags = get_open_flags(oid, false), fd,
@@ -114,6 +151,9 @@ int default_write(uint64_t oid, const struct siocb *iocb)
sync();
}
+ if (flags_direct(flags) && !is_aligned(iocb))
+ return do_aligned_write(oid, iocb);
+
fd = open(path, flags, sd_def_fmode);
if (fd < 0)
return err_to_sderr(path, oid, errno);
@@ -121,8 +161,8 @@ int default_write(uint64_t oid, const struct siocb *iocb)
size = xpwrite(fd, iocb->buf, iocb->length, iocb->offset);
if (size != iocb->length) {
sd_eprintf("failed to write object %"PRIx64", path=%s, offset=%"
- PRId64", size=%"PRId32", result=%zd, %m", oid, path,
- iocb->offset, iocb->length, size);
+ PRId64", size=%"PRId32", result=%zd, %m", oid, path,
+ iocb->offset, iocb->length, size);
ret = err_to_sderr(path, oid, errno);
goto out;
}
@@ -231,12 +271,40 @@ int default_init(void)
}
static int default_read_from_path(uint64_t oid, char *path,
+ const struct siocb *iocb);
+
+static int do_aligned_read(uint64_t oid, char *path, const struct siocb *iocb)
+{
+ struct siocb new = {
+ .offset = round_down(iocb->offset, SECTOR_SIZE),
+ .length = round_up(iocb->offset + iocb->length, SECTOR_SIZE) -
+ round_down(iocb->offset, SECTOR_SIZE)
+ };
+ int ret;
+
+ sd_dprintf("new %"PRIu64 "%"PRIu32 ", old %"PRIu64 "%"PRIu32,
+ new.offset, new.length, iocb->offset, iocb->length);
+ new.buf = xvalloc(new.length);
+ ret = default_read_from_path(oid, path, &new);
+ if (ret != SD_RES_SUCCESS)
+ goto out;
+ memcpy((char *)iocb->buf, (char *)new.buf + iocb->offset - new.offset,
+ iocb->length);
+out:
+ free(new.buf);
+ return ret;
+}
+
+static int default_read_from_path(uint64_t oid, char *path,
const struct siocb *iocb)
{
int flags = get_open_flags(oid, false), fd,
ret = SD_RES_SUCCESS;
ssize_t size;
+ if (flags_direct(flags) && !is_aligned(iocb))
+ return do_aligned_read(oid, path, iocb);
+
fd = open(path, flags);
if (fd < 0)
@@ -245,13 +313,11 @@ static int default_read_from_path(uint64_t oid, char *path,
size = xpread(fd, iocb->buf, iocb->length, iocb->offset);
if (size != iocb->length) {
sd_eprintf("failed to read object %"PRIx64", path=%s, offset=%"
- PRId64", size=%"PRId32", result=%zd, %m", oid, path,
- iocb->offset, iocb->length, size);
+ PRId64", size=%"PRId32", result=%zd, %m", oid, path,
+ iocb->offset, iocb->length, size);
ret = err_to_sderr(path, oid, errno);
}
-
close(fd);
-
return ret;
}
@@ -301,6 +367,10 @@ int default_create_and_write(uint64_t oid, const struct siocb *iocb)
get_obj_path(oid, path);
get_tmp_obj_path(oid, tmp_path);
+ if (flags_direct(flags) && !is_aligned(iocb))
+ /* Drop the O_DIRRECT for create operation for simplicity */
+ flags &= ~O_DIRECT;
+
if (uatomic_is_true(&sys->use_journal) &&
journal_write_store(oid, iocb->buf, iocb->length,
iocb->offset, true)
--
1.7.9.5
More information about the sheepdog
mailing list