[Sheepdog] [PATCH v2 1/2] object cache: enable direct IO for cache object

Liu Yuan namei.unix at gmail.com
Mon Apr 2 10:21:10 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

When sheep is launched with '-D' or '--directio' option, we will use
direct IO for object cache too.

- object cache default to use both host page cache and disk write-back cache (if any)
  this means best performance and greedy to use host memory as much as possible.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/object_cache.c |   20 +++++++++++++-------
 sheep/sheep.c        |    2 +-
 sheep/store.c        |   28 +++++++---------------------
 3 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index ef2c05c..389dc6d 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -30,7 +30,7 @@
 #define HASH_SIZE	(1 << HASH_BITS)
 
 static char cache_dir[PATH_MAX];
-static int def_open_flags = O_DSYNC | O_RDWR;
+static int def_open_flags = O_RDWR;
 extern mode_t def_fmode;
 extern mode_t def_dmode;
 extern struct store_driver *sd_store;
@@ -192,14 +192,17 @@ out:
 static int write_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t count, off_t offset)
 {
 	size_t size;
-	int fd, ret = SD_RES_SUCCESS;
+	int fd, flags = def_open_flags, ret = SD_RES_SUCCESS;
 	struct strbuf p;
 
 	strbuf_init(&p, PATH_MAX);
 	strbuf_addstr(&p, cache_dir);
 	strbuf_addf(&p, "/%06"PRIx32"/%08"PRIx32, vid, idx);
 
-	fd = open(p.buf, def_open_flags, def_fmode);
+	if (sys->use_directio && !(idx & CACHE_VDI_BIT))
+		flags |= O_DIRECT;
+
+	fd = open(p.buf, flags, def_fmode);
 	size = xpwrite(fd, buf, count, offset);
 	if (size != count)
 		ret = SD_RES_EIO;
@@ -211,14 +214,17 @@ static int write_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t coun
 static int read_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t count, off_t offset)
 {
 	size_t size;
-	int fd, ret = SD_RES_SUCCESS;
+	int fd, flags = def_open_flags, ret = SD_RES_SUCCESS;
 	struct strbuf p;
 
 	strbuf_init(&p, PATH_MAX);
 	strbuf_addstr(&p, cache_dir);
 	strbuf_addf(&p, "/%06"PRIx32"/%08"PRIx32, vid, idx);
 
-	fd = open(p.buf, def_open_flags, def_fmode);
+	if (sys->use_directio && !(idx & CACHE_VDI_BIT))
+		flags |= O_DIRECT;
+
+	fd = open(p.buf, flags, def_fmode);
 	size = xpread(fd, buf, count, offset);
 	if (size != count)
 		ret = SD_RES_EIO;
@@ -292,7 +298,7 @@ int object_cache_pull(struct object_cache *oc, uint32_t idx)
 	void *buf;
 
 	if (is_vdi_obj(oid))
-		data_length = sizeof(struct sheepdog_inode);
+		data_length = SD_INODE_SIZE;
 	else
 		data_length = SD_DATA_OBJ_SIZE;
 
@@ -385,7 +391,7 @@ static int push_cache_object(uint32_t vid, uint32_t idx, int create)
 
 	memset(&fake_req, 0, sizeof(fake_req));
 	if (is_vdi_obj(oid))
-		data_length = sizeof(struct sheepdog_inode);
+		data_length = SD_INODE_SIZE;
 	else
 		data_length = SD_DATA_OBJ_SIZE;
 
diff --git a/sheep/sheep.c b/sheep/sheep.c
index b3b834b..6d64a40 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -58,7 +58,7 @@ Options:\n\
   -f, --foreground        make the program run in the foreground\n\
   -l, --loglevel          specify the level of logging detail\n\
   -d, --debug             include debug messages in the log\n\
-  -D, --directio          use direct IO when accessing the object store\n\
+  -D, --directio          use direct IO when accessing the object from cache or backend store\n\
   -z, --zone              specify the zone id\n\
   -v, --vnodes            specify the number of virtual nodes\n\
   -c, --cluster           specify the cluster driver\n\
diff --git a/sheep/store.c b/sheep/store.c
index 9294899..26b1ee3 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1269,20 +1269,6 @@ static int find_tgt_node(struct sd_vnode *old_entry,
 	return -1;
 }
 
-static void *alloc_buffer_for(uint64_t oid)
-{
-	void *buf = NULL;
-
-	if (is_vdi_obj(oid))
-		buf = xmalloc(SD_INODE_SIZE);
-	else if (is_vdi_attr_obj(oid))
-		buf = xmalloc(SD_ATTR_OBJ_SIZE);
-	else
-		buf = xmalloc(SD_DATA_OBJ_SIZE);
-
-	return buf;
-}
-
 static void *get_vnodes_from_epoch(int epoch, int *nr, int *copies)
 {
 	int nodes_nr, len = sizeof(struct sd_vnode) * SD_MAX_VNODES;
@@ -1312,16 +1298,10 @@ static int recover_object_from_replica(uint64_t oid,
 	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
 	char name[128];
 	unsigned wlen = 0, rlen;
-	int fd, ret;
+	int fd, ret = -1;
 	void *buf;
 	struct siocb iocb = { 0 };
 
-	buf = alloc_buffer_for(oid);
-	if (!buf) {
-		eprintf("out of memory\n");
-		return -1;
-	}
-
 	if (is_vdi_obj(oid))
 		rlen = SD_INODE_SIZE;
 	else if (is_vdi_attr_obj(oid))
@@ -1329,6 +1309,12 @@ static int recover_object_from_replica(uint64_t oid,
 	else
 		rlen = SD_DATA_OBJ_SIZE;
 
+	buf = valloc(rlen);
+	if (!buf) {
+		eprintf("%m\n");
+		goto out;
+	}
+
 	if (is_myself(entry->addr, entry->port)) {
 		iocb.epoch = epoch;
 		iocb.length = rlen;
-- 
1.7.8.2




More information about the sheepdog mailing list