Though it is important to support a write caching feature in Sheepdog, I think we should disable it by default for now because there are still some cache coherency problems which looks hard to solve: - When we create a CoW object, we call read_copy_from_replica() to read the source object. However, there is no gurantee that the object is up to date if another node caches it. - Similarly, there is no guarantee that recover_object_from_replica() can read the latest object when doing object recovery. - Some vdi operations don't care about cached data. - Cached data should be synced periodically. Currently, the data is not synced at all until SD_OP_FLUSH_VDI is requested. Let's make it default after it becomes stable and mature. Signed-off-by: MORITA Kazutaka <morita.kazutaka at gmail.com> --- sheep/group.c | 3 ++- sheep/ops.c | 8 ++++++-- sheep/sdnet.c | 3 ++- sheep/sheep.c | 7 ++++++- sheep/sheep_priv.h | 2 ++ sheep/store.c | 11 +++++++---- 6 files changed, 25 insertions(+), 9 deletions(-) diff --git a/sheep/group.c b/sheep/group.c index c7fd387..c781a6f 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -1033,7 +1033,8 @@ static void process_request_queue(void) if (copies > req->vnodes->nr_zones) copies = req->vnodes->nr_zones; - if (!(req->rq.flags & SD_FLAG_CMD_IO_LOCAL) && + if (sys->enable_write_cache && + !(req->rq.flags & SD_FLAG_CMD_IO_LOCAL) && object_is_cached(hdr->oid)) { /* If we have cache of it we are at its service. */ list_add_tail(&req->r_wlist, &sys->outstanding_req_list); diff --git a/sheep/ops.c b/sheep/ops.c index b6f8eb2..30c5d01 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -162,7 +162,7 @@ static int cluster_del_vdi(const struct sd_req *req, struct sd_rsp *rsp, ret = del_vdi(hdr->epoch, data, hdr->data_length, &vid, hdr->snapid, &nr_copies); - if (ret == SD_RES_SUCCESS) + if (sys->enable_write_cache && ret == SD_RES_SUCCESS) object_cache_delete(vid); vdi_rsp->vdi_id = vid; vdi_rsp->copies = nr_copies; @@ -600,8 +600,12 @@ static int local_flush_vdi(const struct sd_req *req, struct sd_rsp *rsp, void *d struct sd_obj_req *hdr = (struct sd_obj_req *)req; uint64_t oid = hdr->oid; uint32_t vid = oid_to_vid(oid); - struct object_cache *cache = find_object_cache(vid, 0); + struct object_cache *cache; + + if (!sys->enable_write_cache) + return SD_RES_SUCCESS; + cache = find_object_cache(vid, 0); if (cache) { if (!sys->async_flush) return object_cache_push(cache); diff --git a/sheep/sdnet.c b/sheep/sdnet.c index f59b1ff..124f7ef 100644 --- a/sheep/sdnet.c +++ b/sheep/sdnet.c @@ -214,7 +214,8 @@ static int check_request(struct request *req) * if we go for a cached object, we don't care if it is busy * or being recovered. */ - if ((hdr->flags & SD_FLAG_CMD_CACHE) && object_is_cached(hdr->oid)) + if (sys->enable_write_cache && (hdr->flags & SD_FLAG_CMD_CACHE) && + object_is_cached(hdr->oid)) return 0; if (!req->local_oid && !req->local_cow_oid) diff --git a/sheep/sheep.c b/sheep/sheep.c index 4e6e266..abd7e1b 100644 --- a/sheep/sheep.c +++ b/sheep/sheep.c @@ -47,11 +47,12 @@ static struct option const long_options[] = { {"stdout", no_argument, NULL, 'o'}, {"port", required_argument, NULL, 'p'}, {"vnodes", required_argument, NULL, 'v'}, + {"writecache", no_argument, NULL, 'w'}, {"zone", required_argument, NULL, 'z'}, {NULL, 0, NULL, 0}, }; -static const char *short_options = "ac:dDfg:Ghi:l:op:v:z:"; +static const char *short_options = "ac:dDfg:Ghi:l:op:v:wz:"; static void usage(int status) { @@ -75,6 +76,7 @@ Options:\n\ -l, --loglevel specify the level of logging detail\n\ -p, --port specify the TCP port on which to listen\n\ -v, --vnodes specify the number of virtual nodes\n\ + -w, --writecache enable writecache\n\ -z, --zone specify the zone id\n\ ", PACKAGE_VERSION, program_name); exit(status); @@ -184,6 +186,9 @@ int main(int argc, char **argv) } sys->this_node.zone = zone; break; + case 'w': + sys->enable_write_cache = 1; + break; case 'v': nr_vnodes = strtol(optarg, &p, 10); if (optarg == p || nr_vnodes < 0 || SD_MAX_VNODES < nr_vnodes) { diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 2275a93..fa991ef 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -107,6 +107,8 @@ struct cluster_info { struct cluster_driver *cdrv; const char *cdrv_option; + int enable_write_cache; + /* set after finishing the JOIN procedure */ int join_finished; struct sd_node this_node; diff --git a/sheep/store.c b/sheep/store.c index f8bf404..a0d973a 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -424,7 +424,7 @@ void do_io_request(struct work *work) if (hdr->flags & SD_FLAG_CMD_IO_LOCAL) { ret = do_local_io(req, epoch); } else { - if (bypass_object_cache(hdr)) { + if (!sys->enable_write_cache || bypass_object_cache(hdr)) { /* fix object consistency when we read the object for the first time */ if (req->check_consistency) { ret = fix_object_consistency(req); @@ -842,9 +842,12 @@ int init_store(const char *d) if (ret) return ret; - ret = object_cache_init(d); - if (ret) - return 1; + if (sys->enable_write_cache) { + ret = object_cache_init(d); + if (ret) + return 1; + } + return ret; } -- 1.7.2.5 |