[Sheepdog] [PATCH 2/2] object cache: introduce async flush

Tue Apr 3 07:47:21 CEST 2012

On 04/02/2012 04:21 PM, Liu Yuan wrote:

> From: Liu Yuan <tailai.ly at taobao.com>
> 
> We async flush dirty object as default to achieve the best performance.
> If users prefer strong consistency over performance, users can launch
> sheep with -S or --sync option.
> 
> We need async flush because:
> 	1) some APP are responsive time sensitive, the writeback of dirty bits in
> 	the guest will mostly hurt RT because guest need to await its completion.
> 	This is a considerably long operation in the sheep cluster.
> 	2) some APP are just memory and CPU intensive, has little of concern of disk
> 	data. (For e.g, just use disk to store logs of APP)
> 	3) People simply prefer performance over consistency.
> 
> Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
> ---
>  sheep/object_cache.c |   23 +++++++++++++++++++++++
>  sheep/ops.c          |   34 ++++++++++++++++++++++++++++++++--
>  sheep/sheep.c        |    9 ++++++++-
>  sheep/sheep_priv.h   |    2 ++
>  4 files changed, 65 insertions(+), 3 deletions(-)
> 
> diff --git a/sheep/object_cache.c b/sheep/object_cache.c
> index 389dc6d..ed10b86 100644
> --- a/sheep/object_cache.c
> +++ b/sheep/object_cache.c
> @@ -203,9 +203,20 @@ static int write_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t coun
>  		flags |= O_DIRECT;
>  
>  	fd = open(p.buf, flags, def_fmode);
> +	if (flock(fd, LOCK_EX) < 0) {
> +		ret = SD_RES_EIO;
> +		eprintf("%m\n");
> +		goto out;
> +	}
>  	size = xpwrite(fd, buf, count, offset);
> +	if (flock(fd, LOCK_UN) < 0) {
> +		ret = SD_RES_EIO;
> +		eprintf("%m\n");
> +		goto out;
> +	}
>  	if (size != count)
>  		ret = SD_RES_EIO;
> +out:
>  	close(fd);
>  	strbuf_release(&p);
>  	return ret;
> @@ -225,9 +236,21 @@ static int read_cache_object(uint32_t vid, uint32_t idx, void *buf, size_t count
>  		flags |= O_DIRECT;
>  
>  	fd = open(p.buf, flags, def_fmode);
> +	if (flock(fd, LOCK_SH) < 0) {
> +		ret = SD_RES_EIO;
> +		eprintf("%m\n");
> +		goto out;
> +	}
> +
>  	size = xpread(fd, buf, count, offset);
> +	if (flock(fd, LOCK_UN) < 0) {
> +		ret = SD_RES_EIO;
> +		eprintf("%m\n");
> +		goto out;
> +	}
>  	if (size != count)
>  		ret = SD_RES_EIO;
> +out:
>  	close(fd);
>  	strbuf_release(&p);
>  	return ret;
> diff --git a/sheep/ops.c b/sheep/ops.c
> index bedafbe..7fd5a18 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -51,6 +51,11 @@ struct sd_op_template {
>  	int (*process_main)(const struct sd_req *req, struct sd_rsp *rsp, void *data);
>  };
>  
> +struct flush_work {
> +	struct object_cache *cache;
> +	struct work work;
> +};
> +
>  static int cluster_new_vdi(const struct sd_req *req, struct sd_rsp *rsp,
>  			   void *data)
>  {
> @@ -476,6 +481,22 @@ static int local_get_snap_file(const struct sd_req *req, struct sd_rsp *rsp,
>  	return ret;
>  }
>  
> +static void flush_vdi_fn(struct work *work)
> +{
> +	struct flush_work *fw = container_of(work, struct flush_work, work);
> +
> +	dprintf("flush vdi %"PRIx32"\n", fw->cache->vid);
> +	if (object_cache_push(fw->cache) != SD_RES_SUCCESS)
> +		eprintf("failed to flush vdi %"PRIx32"\n", fw->cache->vid);
> +}
> +
> +static void flush_vdi_done(struct work *work)
> +{
> +	struct flush_work *fw = container_of(work, struct flush_work, work);
> +	dprintf("flush vdi %"PRIx32" done\n", fw->cache->vid);
> +	free(fw);
> +}
> +
>  static int local_flush_vdi(const struct sd_req *req, struct sd_rsp *rsp, void *data)
>  {
>  	struct sd_obj_req *hdr = (struct sd_obj_req *)req;
> @@ -483,8 +504,17 @@ static int local_flush_vdi(const struct sd_req *req, struct sd_rsp *rsp, void *d
>  	uint32_t vid = oid_to_vid(oid);
>  	struct object_cache *cache = find_object_cache(vid, 0);
>  
> -	if (cache)
> -		return object_cache_push(cache);
> +	if (cache) {
> +		if (sys->sync_flush)
> +			return object_cache_push(cache);
> +		else {
> +			struct flush_work *fw = xmalloc(sizeof(*fw));
> +			fw->work.fn = flush_vdi_fn;
> +			fw->work.done = flush_vdi_done;
> +			fw->cache = cache;
> +			queue_work(sys->flush_wqueue, &fw->work);
> +		}
> +	}
>  
>  	return SD_RES_SUCCESS;
>  }
> diff --git a/sheep/sheep.c b/sheep/sheep.c
> index 6d64a40..ef28f87 100644
> --- a/sheep/sheep.c
> +++ b/sheep/sheep.c
> @@ -59,6 +59,7 @@ Options:\n\
>    -l, --loglevel          specify the level of logging detail\n\
>    -d, --debug             include debug messages in the log\n\
>    -D, --directio          use direct IO when accessing the object from cache or backend store\n\
> +  -S, --sync              flush the object cache synchronously\n\
>    -z, --zone              specify the zone id\n\
>    -v, --vnodes            specify the number of virtual nodes\n\
>    -c, --cluster           specify the cluster driver\n\
> @@ -132,6 +133,10 @@ int main(int argc, char **argv)
>  			dprintf("direct IO mode\n");
>  			sys->use_directio = 1;
>  			break;
> +		case 'S':
> +			dprintf("sync flush\n");
> +			sys->sync_flush = 1;
> +			break;
>  		case 'z':
>  			zone = strtol(optarg, &p, 10);
>  			if (optarg == p || zone < 0 || UINT32_MAX < zone) {
> @@ -215,8 +220,10 @@ int main(int argc, char **argv)
>  	sys->io_wqueue = init_work_queue(NR_IO_WORKER_THREAD);
>  	sys->recovery_wqueue = init_work_queue(1);
>  	sys->deletion_wqueue = init_work_queue(1);
> +	sys->flush_wqueue = init_work_queue(1);
>  	if (!sys->cpg_wqueue || !sys->gateway_wqueue || !sys->io_wqueue ||
> -	    !sys->recovery_wqueue || !sys->deletion_wqueue)
> +	    !sys->recovery_wqueue || !sys->deletion_wqueue ||
> +	    !sys->flush_wqueue)
>  		exit(1);
>  
>  	vprintf(SDOG_NOTICE, "sheepdog daemon (version %s) started\n", PACKAGE_VERSION);
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index a316e2c..b932e83 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -148,12 +148,14 @@ struct cluster_info {
>  	uint32_t recovered_epoch;
>  
>  	int use_directio;
> +	uint8_t sync_flush;
>  
>  	struct work_queue *cpg_wqueue;
>  	struct work_queue *gateway_wqueue;
>  	struct work_queue *io_wqueue;
>  	struct work_queue *deletion_wqueue;
>  	struct work_queue *recovery_wqueue;
> +	struct work_queue *flush_wqueue;
>  };
>  
>  struct siocb {

Applied.

Thanks,
Yuan