[Sheepdog] [PATCH] object cache: enable direct IO for cache object

MORITA Kazutaka morita.kazutaka at gmail.com
Sun Apr 1 14:56:42 CEST 2012


At Sun,  1 Apr 2012 19:51:41 +0800,
Liu Yuan wrote:
> 
> From: Liu Yuan <tailai.ly at taobao.com>
> 
> When sheep is launched with '-D' or '--directio' option, we will use
> direct IO for cache object too.

If you don't want to use a page cache for sheepdog object caches, how
about making O_DIRECT default?  I think you know, but O_DSYNC doesn't
means that sheep doesn't use a page cache.

> 
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
>  sheep/object_cache.c |   18 ++++++++++++------
>  sheep/store.c        |   28 +++++++---------------------
>  2 files changed, 19 insertions(+), 27 deletions(-)

This patch uses (O_DIRECT|O_DSYNC) for data object cache I/Os.
Is there any reason you want to add O_DSYNC to O_DIRECT?  As I said
before, we don't need to flush cache data to disks.

In addition, I'd like to remove a O_DSYNC flag from vdi object cache
I/Os.  It would boosts the performance of metadata updates.

So my suggestion is:
 - (O_DIRECT|O_RDWR) for data object cache I/Os
 - O_RDWR for vdi object cache I/Os


Thanks,

Kazutaka

> 
> diff --git a/sheep/object_cache.c b/sheep/object_cache.c
> index ef2c05c..94cb4eb 100644
> --- a/sheep/object_cache.c
> +++ b/sheep/object_cache.c
> @@ -192,14 +192,17 @@ out:
>  static int write_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t count, off_t offset)
>  {
>  	size_t size;
> -	int fd, ret = SD_RES_SUCCESS;
> +	int fd, flags = def_open_flags, ret = SD_RES_SUCCESS;
>  	struct strbuf p;
>  
>  	strbuf_init(&p, PATH_MAX);
>  	strbuf_addstr(&p, cache_dir);
>  	strbuf_addf(&p, "/%06"PRIx32"/%08"PRIx32, vid, idx);
>  
> -	fd = open(p.buf, def_open_flags, def_fmode);
> +	if (sys->use_directio && !(idx & CACHE_VDI_BIT))
> +		flags |= O_DIRECT;
> +
> +	fd = open(p.buf, flags, def_fmode);
>  	size = xpwrite(fd, buf, count, offset);
>  	if (size != count)
>  		ret = SD_RES_EIO;
> @@ -211,14 +214,17 @@ static int write_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t coun
>  static int read_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t count, off_t offset)
>  {
>  	size_t size;
> -	int fd, ret = SD_RES_SUCCESS;
> +	int fd, flags = def_open_flags, ret = SD_RES_SUCCESS;
>  	struct strbuf p;
>  
>  	strbuf_init(&p, PATH_MAX);
>  	strbuf_addstr(&p, cache_dir);
>  	strbuf_addf(&p, "/%06"PRIx32"/%08"PRIx32, vid, idx);
>  
> -	fd = open(p.buf, def_open_flags, def_fmode);
> +	if (sys->use_directio && !(idx & CACHE_VDI_BIT))
> +		flags |= O_DIRECT;
> +
> +	fd = open(p.buf, flags, def_fmode);
>  	size = xpread(fd, buf, count, offset);
>  	if (size != count)
>  		ret = SD_RES_EIO;
> @@ -292,7 +298,7 @@ int object_cache_pull(struct object_cache *oc, uint32_t idx)
>  	void *buf;
>  
>  	if (is_vdi_obj(oid))
> -		data_length = sizeof(struct sheepdog_inode);
> +		data_length = SD_INODE_SIZE;
>  	else
>  		data_length = SD_DATA_OBJ_SIZE;
>  
> @@ -385,7 +391,7 @@ static int push_cache_object(uint32_t vid, uint32_t idx, int create)
>  
>  	memset(&fake_req, 0, sizeof(fake_req));
>  	if (is_vdi_obj(oid))
> -		data_length = sizeof(struct sheepdog_inode);
> +		data_length = SD_INODE_SIZE;
>  	else
>  		data_length = SD_DATA_OBJ_SIZE;
>  
> diff --git a/sheep/store.c b/sheep/store.c
> index 9294899..26b1ee3 100644
> --- a/sheep/store.c
> +++ b/sheep/store.c
> @@ -1269,20 +1269,6 @@ static int find_tgt_node(struct sd_vnode *old_entry,
>  	return -1;
>  }
>  
> -static void *alloc_buffer_for(uint64_t oid)
> -{
> -	void *buf = NULL;
> -
> -	if (is_vdi_obj(oid))
> -		buf = xmalloc(SD_INODE_SIZE);
> -	else if (is_vdi_attr_obj(oid))
> -		buf = xmalloc(SD_ATTR_OBJ_SIZE);
> -	else
> -		buf = xmalloc(SD_DATA_OBJ_SIZE);
> -
> -	return buf;
> -}
> -
>  static void *get_vnodes_from_epoch(int epoch, int *nr, int *copies)
>  {
>  	int nodes_nr, len = sizeof(struct sd_vnode) * SD_MAX_VNODES;
> @@ -1312,16 +1298,10 @@ static int recover_object_from_replica(uint64_t oid,
>  	struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
>  	char name[128];
>  	unsigned wlen = 0, rlen;
> -	int fd, ret;
> +	int fd, ret = -1;
>  	void *buf;
>  	struct siocb iocb = { 0 };
>  
> -	buf = alloc_buffer_for(oid);
> -	if (!buf) {
> -		eprintf("out of memory\n");
> -		return -1;
> -	}
> -
>  	if (is_vdi_obj(oid))
>  		rlen = SD_INODE_SIZE;
>  	else if (is_vdi_attr_obj(oid))
> @@ -1329,6 +1309,12 @@ static int recover_object_from_replica(uint64_t oid,
>  	else
>  		rlen = SD_DATA_OBJ_SIZE;
>  
> +	buf = valloc(rlen);
> +	if (!buf) {
> +		eprintf("%m\n");
> +		goto out;
> +	}
> +
>  	if (is_myself(entry->addr, entry->port)) {
>  		iocb.epoch = epoch;
>  		iocb.length = rlen;
> -- 
> 1.7.8.2
> 
> -- 
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog



More information about the sheepdog mailing list