[Sheepdog] [PATCH] sheep: disable object cache by default

MORITA Kazutaka morita.kazutaka at gmail.com
Sun May 6 21:37:38 CEST 2012


Though it is important to support a write caching feature in Sheepdog,
I think we should disable it by default for now because there are
still some cache coherency problems which looks hard to solve:

 - When we create a CoW object, we call read_copy_from_replica() to
   read the source object.  However, there is no gurantee that the
   object is up to date if another node caches it.

 - Similarly, there is no guarantee that recover_object_from_replica()
   can read the latest object when doing object recovery.

 - Some vdi operations don't care about cached data.

 - Cached data should be synced periodically.  Currently, the data is
   not synced at all until SD_OP_FLUSH_VDI is requested.

Let's make it default after it becomes stable and mature.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at gmail.com>
---
 sheep/group.c      |    3 ++-
 sheep/ops.c        |    8 ++++++--
 sheep/sdnet.c      |    3 ++-
 sheep/sheep.c      |    7 ++++++-
 sheep/sheep_priv.h |    2 ++
 sheep/store.c      |   11 +++++++----
 6 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/sheep/group.c b/sheep/group.c
index c7fd387..c781a6f 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -1033,7 +1033,8 @@ static void process_request_queue(void)
 			if (copies > req->vnodes->nr_zones)
 				copies = req->vnodes->nr_zones;
 
-			if (!(req->rq.flags & SD_FLAG_CMD_IO_LOCAL) &&
+			if (sys->enable_write_cache &&
+			    !(req->rq.flags & SD_FLAG_CMD_IO_LOCAL) &&
 			    object_is_cached(hdr->oid)) {
 				/* If we have cache of it we are at its service. */
 				list_add_tail(&req->r_wlist, &sys->outstanding_req_list);
diff --git a/sheep/ops.c b/sheep/ops.c
index b6f8eb2..30c5d01 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -162,7 +162,7 @@ static int cluster_del_vdi(const struct sd_req *req, struct sd_rsp *rsp,
 	ret = del_vdi(hdr->epoch, data, hdr->data_length, &vid,
 		      hdr->snapid, &nr_copies);
 
-	if (ret == SD_RES_SUCCESS)
+	if (sys->enable_write_cache && ret == SD_RES_SUCCESS)
 		object_cache_delete(vid);
 	vdi_rsp->vdi_id = vid;
 	vdi_rsp->copies = nr_copies;
@@ -600,8 +600,12 @@ static int local_flush_vdi(const struct sd_req *req, struct sd_rsp *rsp, void *d
 	struct sd_obj_req *hdr = (struct sd_obj_req *)req;
 	uint64_t oid = hdr->oid;
 	uint32_t vid = oid_to_vid(oid);
-	struct object_cache *cache = find_object_cache(vid, 0);
+	struct object_cache *cache;
+
+	if (!sys->enable_write_cache)
+		return SD_RES_SUCCESS;
 
+	cache = find_object_cache(vid, 0);
 	if (cache) {
 		if (!sys->async_flush)
 			return object_cache_push(cache);
diff --git a/sheep/sdnet.c b/sheep/sdnet.c
index f59b1ff..124f7ef 100644
--- a/sheep/sdnet.c
+++ b/sheep/sdnet.c
@@ -214,7 +214,8 @@ static int check_request(struct request *req)
 	 * if we go for a cached object, we don't care if it is busy
 	 * or being recovered.
 	 */
-	if ((hdr->flags & SD_FLAG_CMD_CACHE) && object_is_cached(hdr->oid))
+	if (sys->enable_write_cache && (hdr->flags & SD_FLAG_CMD_CACHE) &&
+	    object_is_cached(hdr->oid))
 		return 0;
 
 	if (!req->local_oid && !req->local_cow_oid)
diff --git a/sheep/sheep.c b/sheep/sheep.c
index 4e6e266..abd7e1b 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -47,11 +47,12 @@ static struct option const long_options[] = {
 	{"stdout", no_argument, NULL, 'o'},
 	{"port", required_argument, NULL, 'p'},
 	{"vnodes", required_argument, NULL, 'v'},
+	{"writecache", no_argument, NULL, 'w'},
 	{"zone", required_argument, NULL, 'z'},
 	{NULL, 0, NULL, 0},
 };
 
-static const char *short_options = "ac:dDfg:Ghi:l:op:v:z:";
+static const char *short_options = "ac:dDfg:Ghi:l:op:v:wz:";
 
 static void usage(int status)
 {
@@ -75,6 +76,7 @@ Options:\n\
   -l, --loglevel          specify the level of logging detail\n\
   -p, --port              specify the TCP port on which to listen\n\
   -v, --vnodes            specify the number of virtual nodes\n\
+  -w, --writecache        enable writecache\n\
   -z, --zone              specify the zone id\n\
 ", PACKAGE_VERSION, program_name);
 	exit(status);
@@ -184,6 +186,9 @@ int main(int argc, char **argv)
 			}
 			sys->this_node.zone = zone;
 			break;
+		case 'w':
+			sys->enable_write_cache = 1;
+			break;
 		case 'v':
 			nr_vnodes = strtol(optarg, &p, 10);
 			if (optarg == p || nr_vnodes < 0 || SD_MAX_VNODES < nr_vnodes) {
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 2275a93..fa991ef 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -107,6 +107,8 @@ struct cluster_info {
 	struct cluster_driver *cdrv;
 	const char *cdrv_option;
 
+	int enable_write_cache;
+
 	/* set after finishing the JOIN procedure */
 	int join_finished;
 	struct sd_node this_node;
diff --git a/sheep/store.c b/sheep/store.c
index f8bf404..a0d973a 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -424,7 +424,7 @@ void do_io_request(struct work *work)
 	if (hdr->flags & SD_FLAG_CMD_IO_LOCAL) {
 		ret = do_local_io(req, epoch);
 	} else {
-		if (bypass_object_cache(hdr)) {
+		if (!sys->enable_write_cache || bypass_object_cache(hdr)) {
 			/* fix object consistency when we read the object for the first time */
 			if (req->check_consistency) {
 				ret = fix_object_consistency(req);
@@ -842,9 +842,12 @@ int init_store(const char *d)
 	if (ret)
 		return ret;
 
-	ret = object_cache_init(d);
-	if (ret)
-		return 1;
+	if (sys->enable_write_cache) {
+		ret = object_cache_init(d);
+		if (ret)
+			return 1;
+	}
+
 	return ret;
 }
 
-- 
1.7.2.5




More information about the sheepdog mailing list