For write, we do a read/modify/write process and for read, we just extend the read as aligned one and copy the exact length to feed the buffer Signed-off-by: Liu Yuan <namei.unix at gmail.com> --- sheep/plain_store.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 92 insertions(+), 6 deletions(-) diff --git a/sheep/plain_store.c b/sheep/plain_store.c index b932d6a..916bc9a 100644 --- a/sheep/plain_store.c +++ b/sheep/plain_store.c @@ -90,6 +90,59 @@ static int err_to_sderr(char *path, uint64_t oid, int err) } } +#define sector_algined(x) ({ ((x) & (SECTOR_SIZE - 1)) == 0; }) + +static inline bool iocb_is_aligned(const struct siocb *iocb) +{ + return sector_algined(iocb->offset) && sector_algined(iocb->length); +} + +static inline int do_aligned_write(uint64_t oid, const struct siocb *iocb) +{ + struct siocb new = { + .epoch = iocb->epoch, + }; + uint32_t new_len = round_up(iocb->offset + iocb->length, SECTOR_SIZE) - + round_down(iocb->offset, SECTOR_SIZE); + uint64_t new_off = round_down(iocb->offset, SECTOR_SIZE); + int ret = SD_RES_SUCCESS; + + sd_dprintf("new %"PRIu64 ", %"PRIu32 ", old %"PRIu64 ", %"PRIu32, + new_off, new_len, iocb->offset, iocb->length); + new.buf = xvalloc(new_len); + /* + * We have to read last sector first because default_read feed buffer + * from the beginning. + */ + if (!sector_algined(iocb->offset + iocb->length)) { + new.offset = round_down(iocb->offset + iocb->length, + SECTOR_SIZE); + new.length = SECTOR_SIZE; + ret = default_read(oid, &new); + if (ret != SD_RES_SUCCESS) + goto out; + memcpy((char *)new.buf + new_len - SECTOR_SIZE, new.buf, + SECTOR_SIZE); + } + if ((new_len > SECTOR_SIZE || + sector_algined(iocb->offset + iocb->length)) && + !sector_algined(iocb->offset)) { + new.offset = new_off; + new.length = SECTOR_SIZE; + ret = default_read(oid, &new); + if (ret != SD_RES_SUCCESS) + goto out; + } + memcpy((char *)new.buf + iocb->offset - new_off, iocb->buf, + iocb->length); + new.offset = new_off; + new.length = new_len; + ret = default_write(oid, &new); +out: + free(new.buf); + return ret; +} + int default_write(uint64_t oid, const struct siocb *iocb) { int flags = get_open_flags(oid, false), fd, @@ -114,6 +167,9 @@ int default_write(uint64_t oid, const struct siocb *iocb) sync(); } + if (flags & O_DIRECT && !iocb_is_aligned(iocb)) + return do_aligned_write(oid, iocb); + fd = open(path, flags, sd_def_fmode); if (fd < 0) return err_to_sderr(path, oid, errno); @@ -121,8 +177,8 @@ int default_write(uint64_t oid, const struct siocb *iocb) size = xpwrite(fd, iocb->buf, iocb->length, iocb->offset); if (size != iocb->length) { sd_eprintf("failed to write object %"PRIx64", path=%s, offset=%" - PRId64", size=%"PRId32", result=%zd, %m", oid, path, - iocb->offset, iocb->length, size); + PRId64", size=%"PRId32", result=%zd, %m", oid, path, + iocb->offset, iocb->length, size); ret = err_to_sderr(path, oid, errno); goto out; } @@ -231,12 +287,40 @@ int default_init(void) } static int default_read_from_path(uint64_t oid, char *path, + const struct siocb *iocb); + +static int do_aligned_read(uint64_t oid, char *path, const struct siocb *iocb) +{ + struct siocb new = { + .offset = round_down(iocb->offset, SECTOR_SIZE), + .length = round_up(iocb->offset + iocb->length, SECTOR_SIZE) - + round_down(iocb->offset, SECTOR_SIZE) + }; + int ret; + + sd_dprintf("new %"PRIu64 ", %"PRIu32 ", old %"PRIu64 ", %"PRIu32, + new.offset, new.length, iocb->offset, iocb->length); + new.buf = xvalloc(new.length); + ret = default_read_from_path(oid, path, &new); + if (ret != SD_RES_SUCCESS) + goto out; + memcpy(iocb->buf, (char *)new.buf + iocb->offset - new.offset, + iocb->length); +out: + free(new.buf); + return ret; +} + +static int default_read_from_path(uint64_t oid, char *path, const struct siocb *iocb) { int flags = get_open_flags(oid, false), fd, ret = SD_RES_SUCCESS; ssize_t size; + if (flags & O_DIRECT && !iocb_is_aligned(iocb)) + return do_aligned_read(oid, path, iocb); + fd = open(path, flags); if (fd < 0) @@ -245,13 +329,11 @@ static int default_read_from_path(uint64_t oid, char *path, size = xpread(fd, iocb->buf, iocb->length, iocb->offset); if (size != iocb->length) { sd_eprintf("failed to read object %"PRIx64", path=%s, offset=%" - PRId64", size=%"PRId32", result=%zd, %m", oid, path, - iocb->offset, iocb->length, size); + PRId64", size=%"PRId32", result=%zd, %m", oid, path, + iocb->offset, iocb->length, size); ret = err_to_sderr(path, oid, errno); } - close(fd); - return ret; } @@ -301,6 +383,10 @@ int default_create_and_write(uint64_t oid, const struct siocb *iocb) get_obj_path(oid, path); get_tmp_obj_path(oid, tmp_path); + if (flags & O_DIRECT && !iocb_is_aligned(iocb)) + /* Drop the O_DIRECT for create operation for simplicity */ + flags &= ~O_DIRECT; + if (uatomic_is_true(&sys->use_journal) && journal_write_store(oid, iocb->buf, iocb->length, iocb->offset, true) -- 1.7.9.5 |