[Sheepdog] [PATCH v2 1/2] object cache: enable direct IO for cache object
Liu Yuan
namei.unix at gmail.com
Mon Apr 2 10:21:10 CEST 2012
From: Liu Yuan <tailai.ly at taobao.com>
When sheep is launched with '-D' or '--directio' option, we will use
direct IO for object cache too.
- object cache default to use both host page cache and disk write-back cache (if any)
this means best performance and greedy to use host memory as much as possible.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/object_cache.c | 20 +++++++++++++-------
sheep/sheep.c | 2 +-
sheep/store.c | 28 +++++++---------------------
3 files changed, 21 insertions(+), 29 deletions(-)
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index ef2c05c..389dc6d 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -30,7 +30,7 @@
#define HASH_SIZE (1 << HASH_BITS)
static char cache_dir[PATH_MAX];
-static int def_open_flags = O_DSYNC | O_RDWR;
+static int def_open_flags = O_RDWR;
extern mode_t def_fmode;
extern mode_t def_dmode;
extern struct store_driver *sd_store;
@@ -192,14 +192,17 @@ out:
static int write_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t count, off_t offset)
{
size_t size;
- int fd, ret = SD_RES_SUCCESS;
+ int fd, flags = def_open_flags, ret = SD_RES_SUCCESS;
struct strbuf p;
strbuf_init(&p, PATH_MAX);
strbuf_addstr(&p, cache_dir);
strbuf_addf(&p, "/%06"PRIx32"/%08"PRIx32, vid, idx);
- fd = open(p.buf, def_open_flags, def_fmode);
+ if (sys->use_directio && !(idx & CACHE_VDI_BIT))
+ flags |= O_DIRECT;
+
+ fd = open(p.buf, flags, def_fmode);
size = xpwrite(fd, buf, count, offset);
if (size != count)
ret = SD_RES_EIO;
@@ -211,14 +214,17 @@ static int write_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t coun
static int read_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t count, off_t offset)
{
size_t size;
- int fd, ret = SD_RES_SUCCESS;
+ int fd, flags = def_open_flags, ret = SD_RES_SUCCESS;
struct strbuf p;
strbuf_init(&p, PATH_MAX);
strbuf_addstr(&p, cache_dir);
strbuf_addf(&p, "/%06"PRIx32"/%08"PRIx32, vid, idx);
- fd = open(p.buf, def_open_flags, def_fmode);
+ if (sys->use_directio && !(idx & CACHE_VDI_BIT))
+ flags |= O_DIRECT;
+
+ fd = open(p.buf, flags, def_fmode);
size = xpread(fd, buf, count, offset);
if (size != count)
ret = SD_RES_EIO;
@@ -292,7 +298,7 @@ int object_cache_pull(struct object_cache *oc, uint32_t idx)
void *buf;
if (is_vdi_obj(oid))
- data_length = sizeof(struct sheepdog_inode);
+ data_length = SD_INODE_SIZE;
else
data_length = SD_DATA_OBJ_SIZE;
@@ -385,7 +391,7 @@ static int push_cache_object(uint32_t vid, uint32_t idx, int create)
memset(&fake_req, 0, sizeof(fake_req));
if (is_vdi_obj(oid))
- data_length = sizeof(struct sheepdog_inode);
+ data_length = SD_INODE_SIZE;
else
data_length = SD_DATA_OBJ_SIZE;
diff --git a/sheep/sheep.c b/sheep/sheep.c
index b3b834b..6d64a40 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -58,7 +58,7 @@ Options:\n\
-f, --foreground make the program run in the foreground\n\
-l, --loglevel specify the level of logging detail\n\
-d, --debug include debug messages in the log\n\
- -D, --directio use direct IO when accessing the object store\n\
+ -D, --directio use direct IO when accessing the object from cache or backend store\n\
-z, --zone specify the zone id\n\
-v, --vnodes specify the number of virtual nodes\n\
-c, --cluster specify the cluster driver\n\
diff --git a/sheep/store.c b/sheep/store.c
index 9294899..26b1ee3 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1269,20 +1269,6 @@ static int find_tgt_node(struct sd_vnode *old_entry,
return -1;
}
-static void *alloc_buffer_for(uint64_t oid)
-{
- void *buf = NULL;
-
- if (is_vdi_obj(oid))
- buf = xmalloc(SD_INODE_SIZE);
- else if (is_vdi_attr_obj(oid))
- buf = xmalloc(SD_ATTR_OBJ_SIZE);
- else
- buf = xmalloc(SD_DATA_OBJ_SIZE);
-
- return buf;
-}
-
static void *get_vnodes_from_epoch(int epoch, int *nr, int *copies)
{
int nodes_nr, len = sizeof(struct sd_vnode) * SD_MAX_VNODES;
@@ -1312,16 +1298,10 @@ static int recover_object_from_replica(uint64_t oid,
struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
char name[128];
unsigned wlen = 0, rlen;
- int fd, ret;
+ int fd, ret = -1;
void *buf;
struct siocb iocb = { 0 };
- buf = alloc_buffer_for(oid);
- if (!buf) {
- eprintf("out of memory\n");
- return -1;
- }
-
if (is_vdi_obj(oid))
rlen = SD_INODE_SIZE;
else if (is_vdi_attr_obj(oid))
@@ -1329,6 +1309,12 @@ static int recover_object_from_replica(uint64_t oid,
else
rlen = SD_DATA_OBJ_SIZE;
+ buf = valloc(rlen);
+ if (!buf) {
+ eprintf("%m\n");
+ goto out;
+ }
+
if (is_myself(entry->addr, entry->port)) {
iocb.epoch = epoch;
iocb.length = rlen;
--
1.7.8.2
More information about the sheepdog
mailing list