[sheepdog] [PATCH v3 3/4] sheep: add SD_OP_SYNC_VDI and SD_OP_FLUSH_PEER for writeback cache semantics

Hitoshi Mitake h.mitake at gmail.com
Wed Sep 5 16:31:04 CEST 2012


8<---
v3: move conditional branch on sys->gateway_only from default_flush() to
peer_flush(), based on Liu Yuan's advice
8<---

This patch adds two new internal sheep operation: SD_OP_SYNC_VDI and
SD_OP_FLUSH_PEER for implementing writeback cache semantics in backend stores.

If writeback cache semantics is used in backend stores, explicit
flushing in all sheeps is required when gateway sheep receives SD_OP_FLUSH_VDI.

After applying this patch, SD_OP_SYNC_VDI will be queued as a gateway
request when sheep receives SD_OP_FLUSH_VDI. SD_OP_SYNC_VDI forwards
SD_OP_FLUSH_PEER to all other sheeps. After receiving the
SD_OP_FLUSH_PEER, sheeps flush their cache of backend stores.

This patch also modifies command line option of sheep. -w was used for
enabling object cache and specyfing size of it. After applying this
patch, -w is also used for enabling writeback cache semantics in
backend stores. Example of new -w is like this:
-w disk ... enable writeback cache semantics of disks
-w disk,object:50 ... enable writeback cache semantics of disks, and
enable object cache with 50MB memory
-w object:50 ... enable object cache with 50MB memory

Cc: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
Cc: Liu Yuan <tailai.ly at taobao.com>
Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 include/internal_proto.h |    2 +
 sheep/ops.c              |   39 +++++++++++++++++-
 sheep/plain_store.c      |    5 +-
 sheep/sheep.c            |   97 ++++++++++++++++++++++++++++++++++++---------
 sheep/sheep_priv.h       |    7 +++-
 sheep/store.c            |    5 +-
 6 files changed, 125 insertions(+), 30 deletions(-)

diff --git a/include/internal_proto.h b/include/internal_proto.h
index 5288823..06f74fa 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -65,6 +65,8 @@
 #define SD_OP_INFO_RECOVER 0xAA
 #define SD_OP_GET_VDI_COPIES 0xAB
 #define SD_OP_COMPLETE_RECOVERY 0xAC
+#define SD_OP_SYNC_VDI 0xAD
+#define SD_OP_FLUSH_PEER 0xAE
 
 /* internal flags for hdr.flags, must be above 0x80 */
 #define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/sheep/ops.c b/sheep/ops.c
index 465d73f..8a527e6 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -650,9 +650,22 @@ static int local_get_snap_file(struct request *req)
 
 static int local_flush_vdi(struct request *req)
 {
-	if (!sys->enable_write_cache)
-		return SD_RES_SUCCESS;
-	return object_cache_flush_vdi(req);
+	int ret = SD_RES_SUCCESS;
+
+	if (sys->enable_write_cache) {
+		ret = object_cache_flush_vdi(req);
+		if (ret != SD_RES_SUCCESS)
+			return ret;
+	}
+
+	if (sys->store_writeback) {
+		struct sd_req hdr;
+
+		sd_init_req(&hdr, SD_OP_SYNC_VDI);
+		return exec_local_req(&hdr, NULL);
+	}
+
+	return ret;
 }
 
 static int local_flush_and_del(struct request *req)
@@ -913,6 +926,14 @@ out:
 	return ret;
 }
 
+int peer_flush(struct request *req)
+{
+	if (sys->gateway_only)
+		return SD_RES_SUCCESS;
+
+	return sd_store->flush();
+}
+
 static struct sd_op_template sd_ops[] = {
 
 	/* cluster operations */
@@ -1179,6 +1200,17 @@ static struct sd_op_template sd_ops[] = {
 		.type = SD_OP_TYPE_LOCAL,
 		.process_main = local_info_recover,
 	},
+
+	[SD_OP_FLUSH_PEER] = {
+		.name = "FLUSH_PEER",
+		.type = SD_OP_TYPE_PEER,
+		.process_work = peer_flush,
+	},
+	[SD_OP_SYNC_VDI] = {
+		.name = "SYNC_VDI",
+		.type = SD_OP_TYPE_GATEWAY,
+		.process_work = gateway_sync_vdi,
+	},
 };
 
 struct sd_op_template *get_sd_op(uint8_t opcode)
@@ -1264,6 +1296,7 @@ static int map_table[] = {
 	[SD_OP_READ_OBJ] = SD_OP_READ_PEER,
 	[SD_OP_WRITE_OBJ] = SD_OP_WRITE_PEER,
 	[SD_OP_REMOVE_OBJ] = SD_OP_REMOVE_PEER,
+	[SD_OP_SYNC_VDI] = SD_OP_FLUSH_PEER,
 };
 
 int gateway_to_peer_opcode(int opcode)
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 036812d..cd41ed0 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -130,6 +130,8 @@ int default_write(uint64_t oid, struct siocb *iocb, int create)
 	}
 
 	get_obj_path(oid, path);
+	if (iocb->flags & SD_FLAG_CMD_CACHE && sys->store_writeback)
+		flags &= ~O_DSYNC;
 	fd = open(path, flags, def_fmode);
 	if (fd < 0)
 		return err_to_sderr(oid, errno);
@@ -436,9 +438,6 @@ int default_flush(void)
 {
 	int fd;
 
-	if (sys->gateway_only)
-		return SD_RES_SUCCESS;
-
 	fd = open(obj_path, O_RDONLY);
 	if (fd < 0) {
 		eprintf("error at open() %s, %s\n", obj_path, strerror(errno));
diff --git a/sheep/sheep.c b/sheep/sheep.c
index e1434cf..f960faf 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -49,9 +49,9 @@ static struct option const long_options[] = {
 	{"stdout", no_argument, NULL, 'o'},
 	{"port", required_argument, NULL, 'p'},
 	{"disk-space", required_argument, NULL, 's'},
-	{"enable-cache", required_argument, NULL, 'w'},
 	{"zone", required_argument, NULL, 'z'},
 	{"pidfile", required_argument, NULL, 'P'},
+	{"cache", required_argument, NULL, 'w'},
 	{NULL, 0, NULL, 0},
 };
 
@@ -78,9 +78,9 @@ Options:\n\
   -p, --port              specify the TCP port on which to listen\n\
   -P, --pidfile           create a pid file\n\
   -s, --disk-space        specify the free disk space in megabytes\n\
-  -w, --enable-cache      enable object cache and specify the max size (M) and mode\n\
   -y, --myaddr            specify the address advertised to other sheep\n\
   -z, --zone              specify the zone id\n\
+  -w, --cache             specify the cache type\n\
 ", PACKAGE_VERSION, program_name);
 	exit(status);
 }
@@ -178,6 +178,77 @@ static int init_signal(void)
 static struct cluster_info __sys;
 struct cluster_info *sys = &__sys;
 
+static void object_cache_set(char *s)
+{
+	const char *header = "object:";
+	int len = strlen(header);
+	char *size, *p;
+	int64_t cache_size;
+
+	if (strncmp(s, header, len))
+		goto err;
+
+	size = s + len;
+	cache_size = strtol(size, &p, 10);
+	if (size == p || cache_size < 0 || UINT64_MAX < cache_size)
+		goto err;
+
+	sys->enable_write_cache = 1;
+	sys->cache_size = cache_size * 1024 * 1024;
+
+	return;
+err:
+	fprintf(stderr, "Invalid object cache option '%s': "
+		"size must be an integer between 0 and %lu\n",
+		s, UINT64_MAX);
+	exit(1);
+}
+
+static void disk_cache_set(char *s)
+{
+	if (strcmp(s, "disk")) {
+		fprintf(stderr, "invalid disk cache option: %s\n", s);
+		exit(1);
+	}
+
+	sys->store_writeback = 1;
+}
+
+static void do_cache_mode(char *s)
+{
+	int i;
+	struct cache_mode {
+		const char *name;
+		void (*set)(char *);
+	};
+
+	struct cache_mode cache_mode_array[] = {
+		{ "object", object_cache_set },
+		{ "disk", disk_cache_set },
+		{ NULL, NULL },
+	};
+
+	for (i = 0; cache_mode_array[i].name; i++) {
+		const char *n = cache_mode_array[i].name;
+
+		if (!strncmp(s, n, strlen(n))) {
+			cache_mode_array[i].set(s);
+			return;
+		}
+	}
+
+	fprintf(stderr, "invalid cache mode: %s\n", s);
+	exit(1);
+}
+
+static void init_cache_mode(char *mode)
+{
+	char *s = strtok(mode, ",");
+	do {
+		do_cache_mode(s);
+	} while ((s = strtok(NULL, ",")));
+}
+
 int main(int argc, char **argv)
 {
 	int ch, longindex;
@@ -188,14 +259,12 @@ int main(int argc, char **argv)
 	int log_level = SDOG_INFO;
 	char path[PATH_MAX];
 	int64_t zone = -1;
-	int64_t cache_size = 0;
 	int64_t free_space = 0;
 	int nr_vnodes = SD_DEFAULT_VNODES;
 	bool explicit_addr = false;
 	int af;
 	char *p;
 	struct cluster_driver *cdrv;
-	int enable_object_cache = 0; /* disabled by default */
 	char *pid_file = NULL;
 
 	signal(SIGPIPE, SIG_IGN);
@@ -263,21 +332,6 @@ int main(int argc, char **argv)
 			}
 			sys->this_node.zone = zone;
 			break;
-		case 'w':
-			enable_object_cache = 1;
-			cache_size = strtol(optarg, &p, 10);
-			if (optarg == p || cache_size < 0 ||
-			    UINT64_MAX < cache_size) {
-				fprintf(stderr, "Invalid cache size '%s': "
-					"must be an integer between 0 and %lu\n",
-					optarg, UINT64_MAX);
-				exit(1);
-			}
-			sys->cache_size = cache_size * 1024 * 1024;
-
-			fprintf(stdout, "enable write cache, "
-				"max cache size %" PRIu64 "M\n", cache_size);
-			break;
 		case 's':
 			free_space = strtoll(optarg, &p, 10);
 			if (optarg == p || free_space <= 0 ||
@@ -303,6 +357,9 @@ int main(int argc, char **argv)
 
 			sys->cdrv_option = get_cdrv_option(sys->cdrv, optarg);
 			break;
+		case 'w':
+			init_cache_mode(optarg);
+			break;
 		case 'h':
 			usage(0);
 			break;
@@ -334,7 +391,7 @@ int main(int argc, char **argv)
 	if (ret)
 		exit(1);
 
-	ret = init_store(dir, enable_object_cache);
+	ret = init_store(dir);
 	if (ret)
 		exit(1);
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index ae9ef66..72a8b42 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -119,6 +119,8 @@ struct cluster_info {
 	struct work_queue *block_wqueue;
 	struct work_queue *sockfd_wqueue;
 	struct work_queue *reclaim_wqueue;
+
+	int store_writeback;
 };
 
 struct siocb {
@@ -212,7 +214,7 @@ static inline uint32_t sys_epoch(void)
 
 int create_listen_port(int port, void *data);
 
-int init_store(const char *dir, int enable_write_cache);
+int init_store(const char *dir);
 int init_base_path(const char *dir);
 
 int fill_vdi_copy_list(void *data);
@@ -354,12 +356,15 @@ int gateway_read_obj(struct request *req);
 int gateway_write_obj(struct request *req);
 int gateway_create_and_write_obj(struct request *req);
 int gateway_remove_obj(struct request *req);
+int gateway_sync_vdi(struct request *req);
 
 /* backend store */
 int peer_read_obj(struct request *req);
 int peer_write_obj(struct request *req);
 int peer_create_and_write_obj(struct request *req);
 int peer_remove_obj(struct request *req);
+int peer_flush(struct request *req);
+
 int default_flush(void);
 
 /* object_cache */
diff --git a/sheep/store.c b/sheep/store.c
index 8326156..e1f1abe 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -480,7 +480,7 @@ out:
 	return ret;
 }
 
-int init_store(const char *d, int enable_write_cache)
+int init_store(const char *d)
 {
 	int ret;
 
@@ -514,8 +514,7 @@ int init_store(const char *d, int enable_write_cache)
 			return ret;
 	}
 
-	if (enable_write_cache) {
-		sys->enable_write_cache = 1;
+	if (sys->enable_write_cache) {
 		ret = object_cache_init(d);
 		if (ret)
 			return 1;
-- 
1.7.5.1




More information about the sheepdog mailing list