[sheepdog] [PATCH 1/2] sheep: change cache semantics back

Liu Yuan namei.unix at gmail.com
Wed Sep 19 09:36:06 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

After commit 91e6884, we changes the 'no flag SD_FLAG_CMD_CACHE' into
'writethrough semantics' instead of 'not enable cache'. This will cause
trouble on collie command when sheep start with object cache enabled:

All the collie command will try oprearate on the object cache, pulling in
objects in lolcal nodes while executing commands.

This will cause a lot of trouble, such as vdi data inconsistency, waste disk
space and bandwidth and so on.

This patch restore old semantics, then current collie code wouldn't need a
single change.

then '-w object:mode={writethrough,writeback},size={size},directio' operate on
object cache and
'-w disk' will enable current disk write back semantic.

This will fix the failure of tests/044.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/object_cache.c | 83 +++++++++++++++++++++++++++++++++++++++-------------
 sheep/request.c      |  2 +-
 sheep/sheep.c        | 16 ++++++++++
 sheep/sheep_priv.h   |  1 +
 4 files changed, 80 insertions(+), 22 deletions(-)

diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 959ca15..9549af8 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -75,6 +75,9 @@ struct object_cache {
 	struct rb_root object_tree;
 
 	pthread_rwlock_t lock;
+
+	int (*read)(struct object_cache_entry *, void *, size_t, off_t);
+	int (*write)(struct object_cache_entry *, void *, size_t, off_t, int);
 };
 
 static struct global_cache sys_cache;
@@ -370,6 +373,19 @@ out:
 	return ret;
 }
 
+static int write_cache_object(struct object_cache_entry *entry, void *buf,
+			      size_t count, off_t offset, int create)
+{
+	uint32_t vid = entry->oc->vid, idx = entry_idx(entry);
+	int ret;
+
+	ret = write_cache_object_noupdate(vid, idx, buf, count, offset);
+
+	if (ret == SD_RES_SUCCESS)
+		update_cache_entry(entry, idx, count, offset, 1);
+	return ret;
+}
+
 static int read_cache_object(struct object_cache_entry *entry, void *buf,
 			     size_t count, off_t offset)
 {
@@ -383,9 +399,9 @@ static int read_cache_object(struct object_cache_entry *entry, void *buf,
 	return ret;
 }
 
-static int write_cache_object(struct object_cache_entry *entry, void *buf,
-			      size_t count, off_t offset, int create,
-			      bool writeback)
+static int write_and_push_cache_object(struct object_cache_entry *entry,
+				       void *buf, size_t count, off_t offset,
+				       int create)
 {
 	uint32_t vid = entry->oc->vid, idx = entry_idx(entry);
 	uint64_t oid = idx_to_oid(vid, idx);
@@ -397,9 +413,6 @@ static int write_cache_object(struct object_cache_entry *entry, void *buf,
 	if (ret != SD_RES_SUCCESS)
 		return ret;
 
-	if (writeback)
-		goto out;
-
 	if (create)
 		sd_init_req(&hdr, SD_OP_CREATE_AND_WRITE_OBJ);
 	else
@@ -415,8 +428,8 @@ static int write_cache_object(struct object_cache_entry *entry, void *buf,
 		eprintf("failed to write object %" PRIx64 ", %x\n", oid, ret);
 		return ret;
 	}
-out:
-	update_cache_entry(entry, idx, count, offset, writeback);
+
+	update_cache_entry(entry, idx, count, offset, 0);
 	return ret;
 }
 
@@ -607,6 +620,12 @@ not_found:
 
 		pthread_rwlock_init(&cache->lock, NULL);
 		hlist_add_head(&cache->hash, head);
+
+		cache->read = read_cache_object;
+		if (sys->object_cache_writeback)
+			cache->write = write_cache_object;
+		else
+			cache->write = write_and_push_cache_object;
 	} else {
 		cache = NULL;
 	}
@@ -674,7 +693,7 @@ static void add_to_object_cache(struct object_cache *oc, uint32_t idx,
 }
 
 static int object_cache_lookup(struct object_cache *oc, uint32_t idx,
-			       int create, bool writeback)
+			       int create)
 {
 	struct strbuf buf;
 	int fd, ret = SD_RES_SUCCESS, flags = def_open_flags;
@@ -712,9 +731,12 @@ static int object_cache_lookup(struct object_cache *oc, uint32_t idx,
 	ret = prealloc(fd, data_length);
 	if (ret != SD_RES_SUCCESS) {
 		ret = SD_RES_EIO;
-	} else
-		add_to_object_cache(oc, idx, writeback);
-
+	} else {
+		if (sys->object_cache_writeback)
+			add_to_object_cache(oc, idx, 1);
+		else
+			add_to_object_cache(oc, idx, 0);
+	}
 	close(fd);
 out:
 	strbuf_release(&buf);
@@ -832,7 +854,7 @@ int object_is_cached(uint64_t oid)
 	if (!cache)
 		return 0;
 
-	return (object_cache_lookup(cache, idx, 0, false) == SD_RES_SUCCESS);
+	return (object_cache_lookup(cache, idx, 0) == SD_RES_SUCCESS);
 }
 
 void object_cache_delete(uint32_t vid)
@@ -940,6 +962,27 @@ int bypass_object_cache(struct request *req)
 {
 	uint64_t oid = req->rq.obj.oid;
 
+	if (!(req->rq.flags & SD_FLAG_CMD_CACHE)) {
+		uint32_t vid = oid_to_vid(oid);
+		struct object_cache *cache;
+
+		cache = find_object_cache(vid, 0);
+		if (!cache)
+			return 1;
+		if (req->rq.flags & SD_FLAG_CMD_WRITE) {
+			object_cache_flush_and_delete(cache);
+			return 1;
+		} else  {
+			/* For read requet, we can read cache if any */
+			uint32_t idx = object_cache_oid_to_idx(oid);
+
+			if (object_cache_lookup(cache, idx, 0) == 0)
+				return 0;
+			else
+				return 1;
+		}
+	}
+
 	/*
 	 * For vmstate && vdi_attr object, we don't do caching
 	 */
@@ -968,8 +1011,7 @@ int object_cache_handle_request(struct request *req)
 		create = 1;
 
 retry:
-	ret = object_cache_lookup(cache, idx, create,
-				  hdr->flags & SD_FLAG_CMD_CACHE);
+	ret = object_cache_lookup(cache, idx, create);
 	if (ret == SD_RES_NO_CACHE) {
 		ret = object_cache_pull(cache, idx);
 		if (ret != SD_RES_SUCCESS)
@@ -990,14 +1032,13 @@ retry:
 	}
 
 	if (hdr->flags & SD_FLAG_CMD_WRITE) {
-		ret = write_cache_object(entry, req->data, hdr->data_length,
-					 hdr->obj.offset, create,
-					 hdr->flags & SD_FLAG_CMD_CACHE);
+		ret = cache->write(entry, req->data, hdr->data_length,
+				   hdr->obj.offset, create);
 		if (ret != SD_RES_SUCCESS)
 			goto err;
 	} else {
-		ret = read_cache_object(entry, req->data, hdr->data_length,
-					hdr->obj.offset);
+		ret = cache->read(entry, req->data, hdr->data_length,
+				  hdr->obj.offset);
 		if (ret != SD_RES_SUCCESS)
 			goto err;
 		req->rp.data_length = hdr->data_length;
@@ -1026,7 +1067,7 @@ int object_cache_write(uint64_t oid, char *data, unsigned int datalen,
 		return SD_RES_NO_CACHE;
 	}
 
-	ret = write_cache_object(entry, data, datalen, offset, create, false);
+	ret = write_cache_object(entry, data, datalen, offset, create);
 
 	put_cache_entry(entry);
 
diff --git a/sheep/request.c b/sheep/request.c
index 0923b8c..4c7a4e7 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -302,7 +302,7 @@ static void queue_gateway_request(struct request *req)
 	 * Even if it doesn't exist in cache, we'll rely on cache layer to pull
 	 * it.
 	 */
-	if (is_object_cache_enabled())
+	if (is_object_cache_enabled() && req->rq.flags & SD_FLAG_CMD_CACHE)
 		goto queue_work;
 
 	if (req->local_oid)
diff --git a/sheep/sheep.c b/sheep/sheep.c
index cdf447d..d43e250 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -212,6 +212,21 @@ err:
 	exit(1);
 }
 
+static void object_cache_mode_set(char *s)
+{
+	const char *header = "mode=";
+	int len = strlen(header);
+	char *mode;
+
+	assert(!strncmp(s, header, len));
+
+	mode = s + len;
+	if (strcmp(mode, "writeback") == 0)
+		sys->object_cache_writeback = 1;
+
+	return;
+}
+
 static void object_cache_directio_set(char *s)
 {
 	assert(!strcmp(s, "directio"));
@@ -231,6 +246,7 @@ static void _object_cache_set(char *s)
 	struct object_cache_arg object_cache_args[] = {
 		{ "size=", object_cache_size_set },
 		{ "directio", object_cache_directio_set },
+		{ "mode=", object_cache_mode_set },
 		{ NULL, NULL },
 	};
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 66f2863..1500f77 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -106,6 +106,7 @@ struct cluster_info {
 
 	uint8_t gateway_only;
 	uint8_t disable_recovery;
+	uint8_t object_cache_writeback;
 
 	struct work_queue *gateway_wqueue;
 	struct work_queue *io_wqueue;
-- 
1.7.12.84.gefa6462




More information about the sheepdog mailing list