From: Liu Yuan <tailai.ly at taobao.com> When sheep is launched with '-D' or '--directio' option, we will use direct IO for object cache too. - object cache default to use both host page cache and disk write-back cache (if any) this means best performance and greedy to use host memory as much as possible. Signed-off-by: Liu Yuan <tailai.ly at taobao.com> --- sheep/object_cache.c | 20 +++++++++++++------- sheep/sheep.c | 2 +- sheep/store.c | 28 +++++++--------------------- 3 files changed, 21 insertions(+), 29 deletions(-) diff --git a/sheep/object_cache.c b/sheep/object_cache.c index ef2c05c..389dc6d 100644 --- a/sheep/object_cache.c +++ b/sheep/object_cache.c @@ -30,7 +30,7 @@ #define HASH_SIZE (1 << HASH_BITS) static char cache_dir[PATH_MAX]; -static int def_open_flags = O_DSYNC | O_RDWR; +static int def_open_flags = O_RDWR; extern mode_t def_fmode; extern mode_t def_dmode; extern struct store_driver *sd_store; @@ -192,14 +192,17 @@ out: static int write_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t count, off_t offset) { size_t size; - int fd, ret = SD_RES_SUCCESS; + int fd, flags = def_open_flags, ret = SD_RES_SUCCESS; struct strbuf p; strbuf_init(&p, PATH_MAX); strbuf_addstr(&p, cache_dir); strbuf_addf(&p, "/%06"PRIx32"/%08"PRIx32, vid, idx); - fd = open(p.buf, def_open_flags, def_fmode); + if (sys->use_directio && !(idx & CACHE_VDI_BIT)) + flags |= O_DIRECT; + + fd = open(p.buf, flags, def_fmode); size = xpwrite(fd, buf, count, offset); if (size != count) ret = SD_RES_EIO; @@ -211,14 +214,17 @@ static int write_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t coun static int read_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t count, off_t offset) { size_t size; - int fd, ret = SD_RES_SUCCESS; + int fd, flags = def_open_flags, ret = SD_RES_SUCCESS; struct strbuf p; strbuf_init(&p, PATH_MAX); strbuf_addstr(&p, cache_dir); strbuf_addf(&p, "/%06"PRIx32"/%08"PRIx32, vid, idx); - fd = open(p.buf, def_open_flags, def_fmode); + if (sys->use_directio && !(idx & CACHE_VDI_BIT)) + flags |= O_DIRECT; + + fd = open(p.buf, flags, def_fmode); size = xpread(fd, buf, count, offset); if (size != count) ret = SD_RES_EIO; @@ -292,7 +298,7 @@ int object_cache_pull(struct object_cache *oc, uint32_t idx) void *buf; if (is_vdi_obj(oid)) - data_length = sizeof(struct sheepdog_inode); + data_length = SD_INODE_SIZE; else data_length = SD_DATA_OBJ_SIZE; @@ -385,7 +391,7 @@ static int push_cache_object(uint32_t vid, uint32_t idx, int create) memset(&fake_req, 0, sizeof(fake_req)); if (is_vdi_obj(oid)) - data_length = sizeof(struct sheepdog_inode); + data_length = SD_INODE_SIZE; else data_length = SD_DATA_OBJ_SIZE; diff --git a/sheep/sheep.c b/sheep/sheep.c index b3b834b..6d64a40 100644 --- a/sheep/sheep.c +++ b/sheep/sheep.c @@ -58,7 +58,7 @@ Options:\n\ -f, --foreground make the program run in the foreground\n\ -l, --loglevel specify the level of logging detail\n\ -d, --debug include debug messages in the log\n\ - -D, --directio use direct IO when accessing the object store\n\ + -D, --directio use direct IO when accessing the object from cache or backend store\n\ -z, --zone specify the zone id\n\ -v, --vnodes specify the number of virtual nodes\n\ -c, --cluster specify the cluster driver\n\ diff --git a/sheep/store.c b/sheep/store.c index 9294899..26b1ee3 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -1269,20 +1269,6 @@ static int find_tgt_node(struct sd_vnode *old_entry, return -1; } -static void *alloc_buffer_for(uint64_t oid) -{ - void *buf = NULL; - - if (is_vdi_obj(oid)) - buf = xmalloc(SD_INODE_SIZE); - else if (is_vdi_attr_obj(oid)) - buf = xmalloc(SD_ATTR_OBJ_SIZE); - else - buf = xmalloc(SD_DATA_OBJ_SIZE); - - return buf; -} - static void *get_vnodes_from_epoch(int epoch, int *nr, int *copies) { int nodes_nr, len = sizeof(struct sd_vnode) * SD_MAX_VNODES; @@ -1312,16 +1298,10 @@ static int recover_object_from_replica(uint64_t oid, struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; char name[128]; unsigned wlen = 0, rlen; - int fd, ret; + int fd, ret = -1; void *buf; struct siocb iocb = { 0 }; - buf = alloc_buffer_for(oid); - if (!buf) { - eprintf("out of memory\n"); - return -1; - } - if (is_vdi_obj(oid)) rlen = SD_INODE_SIZE; else if (is_vdi_attr_obj(oid)) @@ -1329,6 +1309,12 @@ static int recover_object_from_replica(uint64_t oid, else rlen = SD_DATA_OBJ_SIZE; + buf = valloc(rlen); + if (!buf) { + eprintf("%m\n"); + goto out; + } + if (is_myself(entry->addr, entry->port)) { iocb.epoch = epoch; iocb.length = rlen; -- 1.7.8.2 |