[sheepdog] [PATCH] sheep: remove disk write cache

MORITA Kazutaka morita.kazutaka at gmail.com
Sun Mar 3 15:16:59 CET 2013


From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

This removes disk cache from 'w' option because it shows poor
performance against writethrough I/O requests because of many syncfs
calls.  Currently, object cache doesn't break block semantic against
live migration and VM shutdown, so it will be fit for most users.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/gateway.c      |    2 +-
 sheep/object_cache.c |    2 +-
 sheep/ops.c          |   13 +++-------
 sheep/plain_store.c  |    6 ++---
 sheep/request.c      |    2 +-
 sheep/sheep.c        |   66 ++++++++++----------------------------------------
 sheep/sheep_priv.h   |   14 +----------
 sheep/store.c        |    4 +--
 tests/018            |    2 +-
 tests/019            |    2 +-
 tests/020            |    2 +-
 tests/044            |    2 +-
 tests/049            |    4 +--
 13 files changed, 30 insertions(+), 91 deletions(-)

diff --git a/sheep/gateway.c b/sheep/gateway.c
index 71f87e4..acc03c0 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -39,7 +39,7 @@ int gateway_read_obj(struct request *req)
 	uint64_t oid = req->rq.obj.oid;
 	int nr_copies, j;
 
-	if (is_object_cache_enabled() && !req->local &&
+	if (sys->enable_object_cache && !req->local &&
 	    !bypass_object_cache(req)) {
 		ret = object_cache_handle_request(req);
 		goto out;
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 214ce1c..0781c28 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -1006,7 +1006,7 @@ bool bypass_object_cache(const struct request *req)
 {
 	uint64_t oid = req->rq.obj.oid;
 
-	if (!is_object_cache_enabled() || req->local)
+	if (!sys->enable_object_cache || req->local)
 		return true;
 
 	if (req->rq.flags & SD_FLAG_CMD_DIRECT) {
diff --git a/sheep/ops.c b/sheep/ops.c
index ef74871..891231f 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -187,7 +187,7 @@ static int post_cluster_del_vdi(const struct sd_req *req, struct sd_rsp *rsp,
 	struct cache_deletion_work *dw;
 	int ret = rsp->result;
 
-	if (!is_object_cache_enabled())
+	if (!sys->enable_object_cache)
 		return ret;
 
 	dw = xzalloc(sizeof(*dw));
@@ -700,26 +700,19 @@ static int local_flush_vdi(struct request *req)
 {
 	int ret = SD_RES_INVALID_PARMS;
 
-	if (is_object_cache_enabled()) {
+	if (sys->enable_object_cache) {
 		uint32_t vid = oid_to_vid(req->rq.obj.oid);
 		ret = object_cache_flush_vdi(vid);
 		if (ret != SD_RES_SUCCESS)
 			return ret;
 	}
 
-	if (is_disk_cache_enabled()) {
-		struct sd_req hdr;
-
-		sd_init_req(&hdr, SD_OP_FLUSH_NODES);
-		return exec_local_req(&hdr, NULL);
-	}
-
 	return ret;
 }
 
 static int local_flush_and_del(struct request *req)
 {
-	if (!is_object_cache_enabled())
+	if (!sys->enable_object_cache)
 		return SD_RES_SUCCESS;
 	return object_cache_flush_and_del(req);
 }
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 3ef22eb..9580318 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -24,9 +24,7 @@ static int get_open_flags(uint64_t oid, bool create, int fl)
 {
 	int flags = O_DSYNC | O_RDWR;
 
-	if ((fl & SD_FLAG_CMD_CACHE && is_disk_cache_enabled()) ||
-	    uatomic_is_true(&sys->use_journal) ||
-	    sys->nosync == true)
+	if (uatomic_is_true(&sys->use_journal) || sys->nosync == true)
 		flags &= ~O_DSYNC;
 
 	/*
@@ -475,7 +473,7 @@ int default_format(void)
 		sd_eprintf("%m");
 		return SD_RES_EIO;
 	}
-	if (is_object_cache_enabled())
+	if (sys->enable_object_cache)
 		object_cache_format();
 
 	return SD_RES_SUCCESS;
diff --git a/sheep/request.c b/sheep/request.c
index 6821167..3ebdff7 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -297,7 +297,7 @@ static void queue_gateway_request(struct request *req)
 	 * Even if it doesn't exist in cache, we'll rely on cache layer to pull
 	 * it.
 	 */
-	if (is_object_cache_enabled())
+	if (sys->enable_object_cache)
 		goto queue_work;
 
 	if (req->local_oid)
diff --git a/sheep/sheep.c b/sheep/sheep.c
index 12be4cb..fab0c81 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -59,7 +59,7 @@ static struct sd_option sheep_options[] = {
 	{'s', "disk-space", true, "specify the free disk space in megabytes"},
 	{'u', "upgrade", false, "upgrade to the latest data layout"},
 	{'v', "version", false, "show the version"},
-	{'w', "write-cache", true, "specify the cache type"},
+	{'w', "enable-cache", true, "enable object cache"},
 	{'y', "myaddr", true, "specify the address advertised to other sheep"},
 	{'z', "zone", true, "specify the zone id"},
 	{ 0, NULL, false, NULL },
@@ -240,7 +240,6 @@ static void object_cache_dir_set(char *s)
 static void _object_cache_set(char *s)
 {
 	int i;
-	static bool first = true;
 
 	struct object_cache_arg {
 		const char *name;
@@ -254,12 +253,6 @@ static void _object_cache_set(char *s)
 		{ NULL, NULL },
 	};
 
-	if (first) {
-		assert(!strcmp(s, "object"));
-		first = false;
-		return;
-	}
-
 	for (i = 0; object_cache_args[i].name; i++) {
 		const char *n = object_cache_args[i].name;
 
@@ -273,52 +266,19 @@ static void _object_cache_set(char *s)
 	exit(1);
 }
 
-static void object_cache_set(char *s)
-{
-	sys->enabled_cache_type |= CACHE_TYPE_OBJECT;
-	parse_arg(s, ":", _object_cache_set);
-}
-
-static void disk_cache_set(char *s)
-{
-	assert(!strcmp(s, "disk"));
-	sys->enabled_cache_type |= CACHE_TYPE_DISK;
-}
-
-static void do_cache_type(char *s)
-{
-	int i;
-
-	struct cache_type {
-		const char *name;
-		void (*set)(char *);
-	};
-	struct cache_type cache_types[] = {
-		{ "object", object_cache_set },
-		{ "disk", disk_cache_set },
-		{ NULL, NULL },
-	};
-
-	for (i = 0; cache_types[i].name; i++) {
-		const char *n = cache_types[i].name;
-
-		if (!strncmp(s, n, strlen(n))) {
-			cache_types[i].set(s);
-			return;
-		}
-	}
-
-	fprintf(stderr, "invalid cache type: %s\n", s);
-	exit(1);
-}
-
-static void init_cache_type(char *arg)
+static void object_cache_set(char *arg)
 {
+	const char prefix[] = "object:";
+	sys->enable_object_cache = true;
 	sys->object_cache_size = 0;
 
-	parse_arg(arg, ",", do_cache_type);
+	/* if arg has "object:" prefix, remove it for backward compatibility. */
+	if (strncmp(arg, prefix, sizeof(prefix) - 1) == 0)
+		arg += sizeof(prefix) - 1;
+
+	parse_arg(arg, ",", _object_cache_set);
 
-	if (is_object_cache_enabled() && sys->object_cache_size == 0) {
+	if (sys->object_cache_size == 0) {
 		fprintf(stderr, "object cache size is not set\n");
 		exit(1);
 	}
@@ -385,7 +345,7 @@ static int init_work_queues(void)
 	sys->deletion_wqueue = init_ordered_work_queue("deletion");
 	sys->block_wqueue = init_ordered_work_queue("block");
 	sys->sockfd_wqueue = init_ordered_work_queue("sockfd");
-	if (is_object_cache_enabled()) {
+	if (sys->enable_object_cache) {
 		sys->oc_reclaim_wqueue = init_ordered_work_queue("oc_reclaim");
 		sys->oc_push_wqueue = init_work_queue("oc_push", -1);
 		if (!sys->oc_reclaim_wqueue || !sys->oc_push_wqueue)
@@ -542,7 +502,7 @@ int main(int argc, char **argv)
 			sys->cdrv_option = get_cdrv_option(sys->cdrv, optarg);
 			break;
 		case 'w':
-			init_cache_type(optarg);
+			object_cache_set(optarg);
 			break;
 		case 'i':
 			parse_arg(optarg, ",", init_io_arg);
@@ -692,7 +652,7 @@ int main(int argc, char **argv)
 			exit(1);
 	}
 
-	if (is_object_cache_enabled()) {
+	if (sys->enable_object_cache) {
 		if (!strlen(ocpath))
 			/* use object cache internally */
 			memcpy(ocpath, dir, strlen(dir));
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index c0fefb4..b99a48a 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -117,9 +117,7 @@ struct cluster_info {
 	struct work_queue *oc_reclaim_wqueue;
 	struct work_queue *oc_push_wqueue;
 
-#define CACHE_TYPE_OBJECT 0x1
-#define CACHE_TYPE_DISK   0x2
-	int enabled_cache_type;
+	bool enable_object_cache;
 
 	uint32_t object_cache_size;
 	bool object_cache_directio;
@@ -416,16 +414,6 @@ void sheep_del_sockfd(const struct node_id *, struct sockfd *);
 int sheep_exec_req(const struct node_id *nid, struct sd_req *hdr, void *data);
 bool sheep_need_retry(uint32_t epoch);
 
-static inline bool is_object_cache_enabled(void)
-{
-	return !!(sys->enabled_cache_type & CACHE_TYPE_OBJECT);
-}
-
-static inline bool is_disk_cache_enabled(void)
-{
-	return !!(sys->enabled_cache_type & CACHE_TYPE_DISK);
-}
-
 /* journal_file.c */
 int journal_file_init(const char *path, size_t size, bool skip);
 int journal_file_write(uint64_t oid, const char *buf, size_t size, off_t, bool);
diff --git a/sheep/store.c b/sheep/store.c
index dbc687b..2a4301a 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -364,7 +364,7 @@ int write_object(uint64_t oid, char *data, unsigned int datalen,
 	struct sd_req hdr;
 	int ret;
 
-	if (is_object_cache_enabled() && object_is_cached(oid)) {
+	if (sys->enable_object_cache && object_is_cached(oid)) {
 		ret = object_cache_write(oid, data, datalen, offset,
 					 flags, create);
 		if (ret == SD_RES_NO_CACHE)
@@ -427,7 +427,7 @@ int read_object(uint64_t oid, char *data, unsigned int datalen,
 {
 	int ret;
 
-	if (is_object_cache_enabled() && object_is_cached(oid)) {
+	if (sys->enable_object_cache && object_is_cached(oid)) {
 		ret = object_cache_read(oid, data, datalen, offset);
 		if (ret != SD_RES_SUCCESS) {
 			sd_eprintf("try forward read %"PRIx64" %"PRIx32, oid,
diff --git a/tests/018 b/tests/018
index cd2cfd7..d8af69b 100755
--- a/tests/018
+++ b/tests/018
@@ -16,7 +16,7 @@ status=1        # failure is the default!
 _cleanup
 
 for i in `seq 0 2`; do
-    _start_sheep $i "-w object:size=100"
+    _start_sheep $i "-w size=100"
 done
 
 _wait_for_sheep "3"
diff --git a/tests/019 b/tests/019
index f3862fb..98fc2b8 100755
--- a/tests/019
+++ b/tests/019
@@ -16,7 +16,7 @@ status=1        # failure is the default!
 _cleanup
 
 for i in `seq 0 2`; do
-    _start_sheep $i "-w object:size=100"
+    _start_sheep $i "-w size=100"
 done
 
 _wait_for_sheep "3"
diff --git a/tests/020 b/tests/020
index e9943dd..a5fccf8 100755
--- a/tests/020
+++ b/tests/020
@@ -16,7 +16,7 @@ status=1        # failure is the default!
 _cleanup
 
 for i in `seq 0 2`; do
-    _start_sheep $i "-w object:size=20"
+    _start_sheep $i "-w size=20"
 done
 
 _wait_for_sheep "3"
diff --git a/tests/044 b/tests/044
index dcb8be3..a774bb5 100755
--- a/tests/044
+++ b/tests/044
@@ -20,7 +20,7 @@ fi
 _cleanup
 
 for i in 0 1 2; do
-    _start_sheep $i '-s 4096 -w object:size=1000'
+    _start_sheep $i '-s 4096 -w size=1000'
 done
 
 _wait_for_sheep 3
diff --git a/tests/049 b/tests/049
index 79b9bd7..184b3a6 100755
--- a/tests/049
+++ b/tests/049
@@ -16,7 +16,7 @@ status=1        # failure is the default!
 _cleanup
 
 for i in `seq 0 2`; do
-    _start_sheep $i "-w object:size=30"
+    _start_sheep $i "-w size=30"
 done
 
 _wait_for_sheep 3
@@ -32,7 +32,7 @@ sleep 2
 
 #trigger an object reclaim at startup
 for i in `seq 0 2`; do
-    _start_sheep $i "-w object:size=10"
+    _start_sheep $i "-w size=10"
 done
 
 _wait_for_sheep 3
-- 
1.7.9.5




More information about the sheepdog mailing list