[sheepdog] [PATCH v3 3/4] sheep: add SD_OP_SYNC_VDI and SD_OP_FLUSH_PEER for writeback cache semantics
Hitoshi Mitake
h.mitake at gmail.com
Wed Sep 5 16:31:04 CEST 2012
8<---
v3: move conditional branch on sys->gateway_only from default_flush() to
peer_flush(), based on Liu Yuan's advice
8<---
This patch adds two new internal sheep operation: SD_OP_SYNC_VDI and
SD_OP_FLUSH_PEER for implementing writeback cache semantics in backend stores.
If writeback cache semantics is used in backend stores, explicit
flushing in all sheeps is required when gateway sheep receives SD_OP_FLUSH_VDI.
After applying this patch, SD_OP_SYNC_VDI will be queued as a gateway
request when sheep receives SD_OP_FLUSH_VDI. SD_OP_SYNC_VDI forwards
SD_OP_FLUSH_PEER to all other sheeps. After receiving the
SD_OP_FLUSH_PEER, sheeps flush their cache of backend stores.
This patch also modifies command line option of sheep. -w was used for
enabling object cache and specyfing size of it. After applying this
patch, -w is also used for enabling writeback cache semantics in
backend stores. Example of new -w is like this:
-w disk ... enable writeback cache semantics of disks
-w disk,object:50 ... enable writeback cache semantics of disks, and
enable object cache with 50MB memory
-w object:50 ... enable object cache with 50MB memory
Cc: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
Cc: Liu Yuan <tailai.ly at taobao.com>
Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
include/internal_proto.h | 2 +
sheep/ops.c | 39 +++++++++++++++++-
sheep/plain_store.c | 5 +-
sheep/sheep.c | 97 ++++++++++++++++++++++++++++++++++++---------
sheep/sheep_priv.h | 7 +++-
sheep/store.c | 5 +-
6 files changed, 125 insertions(+), 30 deletions(-)
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 5288823..06f74fa 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -65,6 +65,8 @@
#define SD_OP_INFO_RECOVER 0xAA
#define SD_OP_GET_VDI_COPIES 0xAB
#define SD_OP_COMPLETE_RECOVERY 0xAC
+#define SD_OP_SYNC_VDI 0xAD
+#define SD_OP_FLUSH_PEER 0xAE
/* internal flags for hdr.flags, must be above 0x80 */
#define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/sheep/ops.c b/sheep/ops.c
index 465d73f..8a527e6 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -650,9 +650,22 @@ static int local_get_snap_file(struct request *req)
static int local_flush_vdi(struct request *req)
{
- if (!sys->enable_write_cache)
- return SD_RES_SUCCESS;
- return object_cache_flush_vdi(req);
+ int ret = SD_RES_SUCCESS;
+
+ if (sys->enable_write_cache) {
+ ret = object_cache_flush_vdi(req);
+ if (ret != SD_RES_SUCCESS)
+ return ret;
+ }
+
+ if (sys->store_writeback) {
+ struct sd_req hdr;
+
+ sd_init_req(&hdr, SD_OP_SYNC_VDI);
+ return exec_local_req(&hdr, NULL);
+ }
+
+ return ret;
}
static int local_flush_and_del(struct request *req)
@@ -913,6 +926,14 @@ out:
return ret;
}
+int peer_flush(struct request *req)
+{
+ if (sys->gateway_only)
+ return SD_RES_SUCCESS;
+
+ return sd_store->flush();
+}
+
static struct sd_op_template sd_ops[] = {
/* cluster operations */
@@ -1179,6 +1200,17 @@ static struct sd_op_template sd_ops[] = {
.type = SD_OP_TYPE_LOCAL,
.process_main = local_info_recover,
},
+
+ [SD_OP_FLUSH_PEER] = {
+ .name = "FLUSH_PEER",
+ .type = SD_OP_TYPE_PEER,
+ .process_work = peer_flush,
+ },
+ [SD_OP_SYNC_VDI] = {
+ .name = "SYNC_VDI",
+ .type = SD_OP_TYPE_GATEWAY,
+ .process_work = gateway_sync_vdi,
+ },
};
struct sd_op_template *get_sd_op(uint8_t opcode)
@@ -1264,6 +1296,7 @@ static int map_table[] = {
[SD_OP_READ_OBJ] = SD_OP_READ_PEER,
[SD_OP_WRITE_OBJ] = SD_OP_WRITE_PEER,
[SD_OP_REMOVE_OBJ] = SD_OP_REMOVE_PEER,
+ [SD_OP_SYNC_VDI] = SD_OP_FLUSH_PEER,
};
int gateway_to_peer_opcode(int opcode)
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 036812d..cd41ed0 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -130,6 +130,8 @@ int default_write(uint64_t oid, struct siocb *iocb, int create)
}
get_obj_path(oid, path);
+ if (iocb->flags & SD_FLAG_CMD_CACHE && sys->store_writeback)
+ flags &= ~O_DSYNC;
fd = open(path, flags, def_fmode);
if (fd < 0)
return err_to_sderr(oid, errno);
@@ -436,9 +438,6 @@ int default_flush(void)
{
int fd;
- if (sys->gateway_only)
- return SD_RES_SUCCESS;
-
fd = open(obj_path, O_RDONLY);
if (fd < 0) {
eprintf("error at open() %s, %s\n", obj_path, strerror(errno));
diff --git a/sheep/sheep.c b/sheep/sheep.c
index e1434cf..f960faf 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -49,9 +49,9 @@ static struct option const long_options[] = {
{"stdout", no_argument, NULL, 'o'},
{"port", required_argument, NULL, 'p'},
{"disk-space", required_argument, NULL, 's'},
- {"enable-cache", required_argument, NULL, 'w'},
{"zone", required_argument, NULL, 'z'},
{"pidfile", required_argument, NULL, 'P'},
+ {"cache", required_argument, NULL, 'w'},
{NULL, 0, NULL, 0},
};
@@ -78,9 +78,9 @@ Options:\n\
-p, --port specify the TCP port on which to listen\n\
-P, --pidfile create a pid file\n\
-s, --disk-space specify the free disk space in megabytes\n\
- -w, --enable-cache enable object cache and specify the max size (M) and mode\n\
-y, --myaddr specify the address advertised to other sheep\n\
-z, --zone specify the zone id\n\
+ -w, --cache specify the cache type\n\
", PACKAGE_VERSION, program_name);
exit(status);
}
@@ -178,6 +178,77 @@ static int init_signal(void)
static struct cluster_info __sys;
struct cluster_info *sys = &__sys;
+static void object_cache_set(char *s)
+{
+ const char *header = "object:";
+ int len = strlen(header);
+ char *size, *p;
+ int64_t cache_size;
+
+ if (strncmp(s, header, len))
+ goto err;
+
+ size = s + len;
+ cache_size = strtol(size, &p, 10);
+ if (size == p || cache_size < 0 || UINT64_MAX < cache_size)
+ goto err;
+
+ sys->enable_write_cache = 1;
+ sys->cache_size = cache_size * 1024 * 1024;
+
+ return;
+err:
+ fprintf(stderr, "Invalid object cache option '%s': "
+ "size must be an integer between 0 and %lu\n",
+ s, UINT64_MAX);
+ exit(1);
+}
+
+static void disk_cache_set(char *s)
+{
+ if (strcmp(s, "disk")) {
+ fprintf(stderr, "invalid disk cache option: %s\n", s);
+ exit(1);
+ }
+
+ sys->store_writeback = 1;
+}
+
+static void do_cache_mode(char *s)
+{
+ int i;
+ struct cache_mode {
+ const char *name;
+ void (*set)(char *);
+ };
+
+ struct cache_mode cache_mode_array[] = {
+ { "object", object_cache_set },
+ { "disk", disk_cache_set },
+ { NULL, NULL },
+ };
+
+ for (i = 0; cache_mode_array[i].name; i++) {
+ const char *n = cache_mode_array[i].name;
+
+ if (!strncmp(s, n, strlen(n))) {
+ cache_mode_array[i].set(s);
+ return;
+ }
+ }
+
+ fprintf(stderr, "invalid cache mode: %s\n", s);
+ exit(1);
+}
+
+static void init_cache_mode(char *mode)
+{
+ char *s = strtok(mode, ",");
+ do {
+ do_cache_mode(s);
+ } while ((s = strtok(NULL, ",")));
+}
+
int main(int argc, char **argv)
{
int ch, longindex;
@@ -188,14 +259,12 @@ int main(int argc, char **argv)
int log_level = SDOG_INFO;
char path[PATH_MAX];
int64_t zone = -1;
- int64_t cache_size = 0;
int64_t free_space = 0;
int nr_vnodes = SD_DEFAULT_VNODES;
bool explicit_addr = false;
int af;
char *p;
struct cluster_driver *cdrv;
- int enable_object_cache = 0; /* disabled by default */
char *pid_file = NULL;
signal(SIGPIPE, SIG_IGN);
@@ -263,21 +332,6 @@ int main(int argc, char **argv)
}
sys->this_node.zone = zone;
break;
- case 'w':
- enable_object_cache = 1;
- cache_size = strtol(optarg, &p, 10);
- if (optarg == p || cache_size < 0 ||
- UINT64_MAX < cache_size) {
- fprintf(stderr, "Invalid cache size '%s': "
- "must be an integer between 0 and %lu\n",
- optarg, UINT64_MAX);
- exit(1);
- }
- sys->cache_size = cache_size * 1024 * 1024;
-
- fprintf(stdout, "enable write cache, "
- "max cache size %" PRIu64 "M\n", cache_size);
- break;
case 's':
free_space = strtoll(optarg, &p, 10);
if (optarg == p || free_space <= 0 ||
@@ -303,6 +357,9 @@ int main(int argc, char **argv)
sys->cdrv_option = get_cdrv_option(sys->cdrv, optarg);
break;
+ case 'w':
+ init_cache_mode(optarg);
+ break;
case 'h':
usage(0);
break;
@@ -334,7 +391,7 @@ int main(int argc, char **argv)
if (ret)
exit(1);
- ret = init_store(dir, enable_object_cache);
+ ret = init_store(dir);
if (ret)
exit(1);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index ae9ef66..72a8b42 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -119,6 +119,8 @@ struct cluster_info {
struct work_queue *block_wqueue;
struct work_queue *sockfd_wqueue;
struct work_queue *reclaim_wqueue;
+
+ int store_writeback;
};
struct siocb {
@@ -212,7 +214,7 @@ static inline uint32_t sys_epoch(void)
int create_listen_port(int port, void *data);
-int init_store(const char *dir, int enable_write_cache);
+int init_store(const char *dir);
int init_base_path(const char *dir);
int fill_vdi_copy_list(void *data);
@@ -354,12 +356,15 @@ int gateway_read_obj(struct request *req);
int gateway_write_obj(struct request *req);
int gateway_create_and_write_obj(struct request *req);
int gateway_remove_obj(struct request *req);
+int gateway_sync_vdi(struct request *req);
/* backend store */
int peer_read_obj(struct request *req);
int peer_write_obj(struct request *req);
int peer_create_and_write_obj(struct request *req);
int peer_remove_obj(struct request *req);
+int peer_flush(struct request *req);
+
int default_flush(void);
/* object_cache */
diff --git a/sheep/store.c b/sheep/store.c
index 8326156..e1f1abe 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -480,7 +480,7 @@ out:
return ret;
}
-int init_store(const char *d, int enable_write_cache)
+int init_store(const char *d)
{
int ret;
@@ -514,8 +514,7 @@ int init_store(const char *d, int enable_write_cache)
return ret;
}
- if (enable_write_cache) {
- sys->enable_write_cache = 1;
+ if (sys->enable_write_cache) {
ret = object_cache_init(d);
if (ret)
return 1;
--
1.7.5.1
More information about the sheepdog
mailing list