[sheepdog] [PATCH 1/2] sheep: change cache semantics back
Liu Yuan
namei.unix at gmail.com
Wed Sep 19 09:36:06 CEST 2012
From: Liu Yuan <tailai.ly at taobao.com>
After commit 91e6884, we changes the 'no flag SD_FLAG_CMD_CACHE' into
'writethrough semantics' instead of 'not enable cache'. This will cause
trouble on collie command when sheep start with object cache enabled:
All the collie command will try oprearate on the object cache, pulling in
objects in lolcal nodes while executing commands.
This will cause a lot of trouble, such as vdi data inconsistency, waste disk
space and bandwidth and so on.
This patch restore old semantics, then current collie code wouldn't need a
single change.
then '-w object:mode={writethrough,writeback},size={size},directio' operate on
object cache and
'-w disk' will enable current disk write back semantic.
This will fix the failure of tests/044.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/object_cache.c | 83 +++++++++++++++++++++++++++++++++++++++-------------
sheep/request.c | 2 +-
sheep/sheep.c | 16 ++++++++++
sheep/sheep_priv.h | 1 +
4 files changed, 80 insertions(+), 22 deletions(-)
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 959ca15..9549af8 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -75,6 +75,9 @@ struct object_cache {
struct rb_root object_tree;
pthread_rwlock_t lock;
+
+ int (*read)(struct object_cache_entry *, void *, size_t, off_t);
+ int (*write)(struct object_cache_entry *, void *, size_t, off_t, int);
};
static struct global_cache sys_cache;
@@ -370,6 +373,19 @@ out:
return ret;
}
+static int write_cache_object(struct object_cache_entry *entry, void *buf,
+ size_t count, off_t offset, int create)
+{
+ uint32_t vid = entry->oc->vid, idx = entry_idx(entry);
+ int ret;
+
+ ret = write_cache_object_noupdate(vid, idx, buf, count, offset);
+
+ if (ret == SD_RES_SUCCESS)
+ update_cache_entry(entry, idx, count, offset, 1);
+ return ret;
+}
+
static int read_cache_object(struct object_cache_entry *entry, void *buf,
size_t count, off_t offset)
{
@@ -383,9 +399,9 @@ static int read_cache_object(struct object_cache_entry *entry, void *buf,
return ret;
}
-static int write_cache_object(struct object_cache_entry *entry, void *buf,
- size_t count, off_t offset, int create,
- bool writeback)
+static int write_and_push_cache_object(struct object_cache_entry *entry,
+ void *buf, size_t count, off_t offset,
+ int create)
{
uint32_t vid = entry->oc->vid, idx = entry_idx(entry);
uint64_t oid = idx_to_oid(vid, idx);
@@ -397,9 +413,6 @@ static int write_cache_object(struct object_cache_entry *entry, void *buf,
if (ret != SD_RES_SUCCESS)
return ret;
- if (writeback)
- goto out;
-
if (create)
sd_init_req(&hdr, SD_OP_CREATE_AND_WRITE_OBJ);
else
@@ -415,8 +428,8 @@ static int write_cache_object(struct object_cache_entry *entry, void *buf,
eprintf("failed to write object %" PRIx64 ", %x\n", oid, ret);
return ret;
}
-out:
- update_cache_entry(entry, idx, count, offset, writeback);
+
+ update_cache_entry(entry, idx, count, offset, 0);
return ret;
}
@@ -607,6 +620,12 @@ not_found:
pthread_rwlock_init(&cache->lock, NULL);
hlist_add_head(&cache->hash, head);
+
+ cache->read = read_cache_object;
+ if (sys->object_cache_writeback)
+ cache->write = write_cache_object;
+ else
+ cache->write = write_and_push_cache_object;
} else {
cache = NULL;
}
@@ -674,7 +693,7 @@ static void add_to_object_cache(struct object_cache *oc, uint32_t idx,
}
static int object_cache_lookup(struct object_cache *oc, uint32_t idx,
- int create, bool writeback)
+ int create)
{
struct strbuf buf;
int fd, ret = SD_RES_SUCCESS, flags = def_open_flags;
@@ -712,9 +731,12 @@ static int object_cache_lookup(struct object_cache *oc, uint32_t idx,
ret = prealloc(fd, data_length);
if (ret != SD_RES_SUCCESS) {
ret = SD_RES_EIO;
- } else
- add_to_object_cache(oc, idx, writeback);
-
+ } else {
+ if (sys->object_cache_writeback)
+ add_to_object_cache(oc, idx, 1);
+ else
+ add_to_object_cache(oc, idx, 0);
+ }
close(fd);
out:
strbuf_release(&buf);
@@ -832,7 +854,7 @@ int object_is_cached(uint64_t oid)
if (!cache)
return 0;
- return (object_cache_lookup(cache, idx, 0, false) == SD_RES_SUCCESS);
+ return (object_cache_lookup(cache, idx, 0) == SD_RES_SUCCESS);
}
void object_cache_delete(uint32_t vid)
@@ -940,6 +962,27 @@ int bypass_object_cache(struct request *req)
{
uint64_t oid = req->rq.obj.oid;
+ if (!(req->rq.flags & SD_FLAG_CMD_CACHE)) {
+ uint32_t vid = oid_to_vid(oid);
+ struct object_cache *cache;
+
+ cache = find_object_cache(vid, 0);
+ if (!cache)
+ return 1;
+ if (req->rq.flags & SD_FLAG_CMD_WRITE) {
+ object_cache_flush_and_delete(cache);
+ return 1;
+ } else {
+ /* For read requet, we can read cache if any */
+ uint32_t idx = object_cache_oid_to_idx(oid);
+
+ if (object_cache_lookup(cache, idx, 0) == 0)
+ return 0;
+ else
+ return 1;
+ }
+ }
+
/*
* For vmstate && vdi_attr object, we don't do caching
*/
@@ -968,8 +1011,7 @@ int object_cache_handle_request(struct request *req)
create = 1;
retry:
- ret = object_cache_lookup(cache, idx, create,
- hdr->flags & SD_FLAG_CMD_CACHE);
+ ret = object_cache_lookup(cache, idx, create);
if (ret == SD_RES_NO_CACHE) {
ret = object_cache_pull(cache, idx);
if (ret != SD_RES_SUCCESS)
@@ -990,14 +1032,13 @@ retry:
}
if (hdr->flags & SD_FLAG_CMD_WRITE) {
- ret = write_cache_object(entry, req->data, hdr->data_length,
- hdr->obj.offset, create,
- hdr->flags & SD_FLAG_CMD_CACHE);
+ ret = cache->write(entry, req->data, hdr->data_length,
+ hdr->obj.offset, create);
if (ret != SD_RES_SUCCESS)
goto err;
} else {
- ret = read_cache_object(entry, req->data, hdr->data_length,
- hdr->obj.offset);
+ ret = cache->read(entry, req->data, hdr->data_length,
+ hdr->obj.offset);
if (ret != SD_RES_SUCCESS)
goto err;
req->rp.data_length = hdr->data_length;
@@ -1026,7 +1067,7 @@ int object_cache_write(uint64_t oid, char *data, unsigned int datalen,
return SD_RES_NO_CACHE;
}
- ret = write_cache_object(entry, data, datalen, offset, create, false);
+ ret = write_cache_object(entry, data, datalen, offset, create);
put_cache_entry(entry);
diff --git a/sheep/request.c b/sheep/request.c
index 0923b8c..4c7a4e7 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -302,7 +302,7 @@ static void queue_gateway_request(struct request *req)
* Even if it doesn't exist in cache, we'll rely on cache layer to pull
* it.
*/
- if (is_object_cache_enabled())
+ if (is_object_cache_enabled() && req->rq.flags & SD_FLAG_CMD_CACHE)
goto queue_work;
if (req->local_oid)
diff --git a/sheep/sheep.c b/sheep/sheep.c
index cdf447d..d43e250 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -212,6 +212,21 @@ err:
exit(1);
}
+static void object_cache_mode_set(char *s)
+{
+ const char *header = "mode=";
+ int len = strlen(header);
+ char *mode;
+
+ assert(!strncmp(s, header, len));
+
+ mode = s + len;
+ if (strcmp(mode, "writeback") == 0)
+ sys->object_cache_writeback = 1;
+
+ return;
+}
+
static void object_cache_directio_set(char *s)
{
assert(!strcmp(s, "directio"));
@@ -231,6 +246,7 @@ static void _object_cache_set(char *s)
struct object_cache_arg object_cache_args[] = {
{ "size=", object_cache_size_set },
{ "directio", object_cache_directio_set },
+ { "mode=", object_cache_mode_set },
{ NULL, NULL },
};
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 66f2863..1500f77 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -106,6 +106,7 @@ struct cluster_info {
uint8_t gateway_only;
uint8_t disable_recovery;
+ uint8_t object_cache_writeback;
struct work_queue *gateway_wqueue;
struct work_queue *io_wqueue;
--
1.7.12.84.gefa6462
More information about the sheepdog
mailing list