From: Liu Yuan <tailai.ly at taobao.com> - change global store structure to a pointer - use a list to maintain the stores. - use /obj/.store to remember backend store persistently. - now we can specify the backend store in the command collie cluster format -b farm #use farm if no store specified, currently sheep will use 'simple' store. if specified store not available, collie will return a list of available stores. Signed-off-by: Liu Yuan <tailai.ly at taobao.com> --- collie/cluster.c | 59 +++++++++++++++++++++++- collie/collie.c | 1 + include/sheep.h | 9 ++- include/sheepdog_proto.h | 1 + sheep/ops.c | 30 ++++++++++++ sheep/sheep_priv.h | 34 +++++++++++++- sheep/simple_store.c | 10 +--- sheep/store.c | 115 +++++++++++++++++++++++++++++++++++---------- 8 files changed, 219 insertions(+), 40 deletions(-) diff --git a/collie/cluster.c b/collie/cluster.c index 6fbda6b..a97ef79 100644 --- a/collie/cluster.c +++ b/collie/cluster.c @@ -20,6 +20,7 @@ struct cluster_cmd_data { int copies; int nohalt; int force; + char argv[10]; } cluster_cmd_data; static void set_nohalt(uint16_t *p) @@ -28,6 +29,53 @@ static void set_nohalt(uint16_t *p) *p |= SD_FLAG_NOHALT; } +static int get_store_index(char *name) +{ + int ret = -1; + if (!strlen(name) || strcmp(name, "simple") == 0) + ret = 0; + return ret; +} + +static int list_store(void) +{ + int fd, ret; + struct sd_req hdr; + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; + unsigned rlen, wlen; + char buf[512] = { 0 }; + + fd = connect_to(sdhost, sdport); + if (fd < 0) + return EXIT_SYSFAIL; + + memset(&hdr, 0, sizeof(hdr)); + + wlen = 0; + rlen = 512; + hdr.opcode = SD_OP_GET_STORE_LIST; + hdr.data_length = rlen; + + ret = exec_req(fd, &hdr, buf, &wlen, &rlen); + close(fd); + + if (ret) { + fprintf(stderr, "Failed to connect\n"); + return EXIT_FAILURE; + } + + if (rsp->result != SD_RES_SUCCESS) { + fprintf(stderr, "Restore failed: %s\n", + sd_strerror(rsp->result)); + return EXIT_FAILURE; + } + + printf("Available stores:\n"); + printf("---------------------------------------\n"); + printf("%s\n", buf); + return EXIT_SUCCESS; +} + static int cluster_format(int argc, char **argv) { int fd, ret; @@ -35,6 +83,9 @@ static int cluster_format(int argc, char **argv) struct sd_so_rsp *rsp = (struct sd_so_rsp *)&hdr; unsigned rlen, wlen; struct timeval tv; + uint8_t idx; + + idx = get_store_index(cluster_cmd_data.argv); fd = connect_to(sdhost, sdport); if (fd < 0) @@ -50,6 +101,7 @@ static int cluster_format(int argc, char **argv) set_nohalt(&hdr.flags); hdr.epoch = node_list_version; hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; + hdr.index = idx; rlen = 0; wlen = 0; @@ -64,7 +116,7 @@ static int cluster_format(int argc, char **argv) if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "Format failed: %s\n", sd_strerror(rsp->result)); - return EXIT_FAILURE; + return list_store(); } return EXIT_SUCCESS; @@ -237,7 +289,7 @@ static int cluster_recover(int argc, char **argv) static struct subcommand cluster_cmd[] = { {"info", NULL, "aprh", "show cluster information", 0, cluster_info}, - {"format", NULL, "cHaph", "create a Sheepdog store", + {"format", NULL, "bcHaph", "create a Sheepdog store", 0, cluster_format}, {"shutdown", NULL, "aph", "stop Sheepdog", SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown}, @@ -252,6 +304,9 @@ static int cluster_parser(int ch, char *opt) char *p; switch (ch) { + case 'b': + strcpy(cluster_cmd_data.argv, opt); + break; case 'c': copies = strtol(opt, &p, 10); if (opt == p || copies < 1) { diff --git a/collie/collie.c b/collie/collie.c index 19cc9a9..baf7c67 100644 --- a/collie/collie.c +++ b/collie/collie.c @@ -40,6 +40,7 @@ static const struct sd_option collie_options[] = { {'d', "delete", 0, "delete a key"}, /* cluster options */ + {'b', "store", 1, "specify backend store"}, {'c', "copies", 1, "specify the data redundancy (number of copies)"}, {'H', "nohalt", 0, "serve IO requests even if there are too few\n\ nodes for the configured redundancy"}, diff --git a/include/sheep.h b/include/sheep.h index 906c1f5..6cd63e7 100644 --- a/include/sheep.h +++ b/include/sheep.h @@ -27,7 +27,7 @@ #define SD_MAX_VNODES 65536 #define SD_MAX_VMS 4096 /* FIXME: should be removed */ -#define SD_OP_SHEEP 0x80 +#define SD_OP_SHEEP 0x80 #define SD_OP_DEL_VDI 0x81 #define SD_OP_GET_NODE_LIST 0x82 #define SD_OP_GET_VM_LIST 0x83 @@ -37,7 +37,8 @@ #define SD_OP_STAT_CLUSTER 0x87 #define SD_OP_KILL_NODE 0x88 #define SD_OP_GET_VDI_ATTR 0x89 -#define SD_OP_RECOVER 0x8A +#define SD_OP_RECOVER 0x8a +#define SD_OP_GET_STORE_LIST 0x90 #define SD_FLAG_CMD_IO_LOCAL 0x0010 #define SD_FLAG_CMD_RECOVERY 0x0020 @@ -72,7 +73,8 @@ struct sd_so_req { uint64_t ctime; uint32_t copies; uint32_t tag; - uint32_t opcode_specific[2]; + uint8_t index; + uint8_t opcode_specific[7]; }; struct sd_so_rsp { @@ -263,6 +265,7 @@ static inline const char *sd_strerror(int err) {SD_RES_JOIN_FAILED, "Node has failed to join cluster"}, {SD_RES_HALT, "IO has halted as there are too few living nodes"}, {SD_RES_MANUAL_RECOVER, "Cluster is running/halted and cannot be manually recovered"}, + {SD_RES_NO_STORE, "Targeted backend store is not found"}, {SD_RES_OLD_NODE_VER, "Remote node has an old epoch"}, {SD_RES_NEW_NODE_VER, "Remote node has a new epoch"}, diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index b664223..289502f 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -59,6 +59,7 @@ #define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */ #define SD_RES_HALT 0x19 /* Sheepdog is stopped doing IO */ #define SD_RES_MANUAL_RECOVER 0x1A /* Users should not manually recover this cluster */ +#define SD_RES_NO_STORE 0x20 /* No targeted backend store */ /* * Object ID rules diff --git a/sheep/ops.c b/sheep/ops.c index 13ecdf2..478d41a 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -12,6 +12,10 @@ #include <stdlib.h> #include "sheep_priv.h" +#include "strbuf.h" + +extern char *obj_path; +extern struct store_driver *sd_store; enum sd_op_type { SD_OP_TYPE_CLUSTER = 1, /* cluster operations */ @@ -132,6 +136,11 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp, int i, latest_epoch, ret; uint64_t ctime; + sd_store = find_store_driver(index_to_name(hdr->index)); + if (!sd_store) + return SD_RES_NO_STORE; + + sd_store->init(obj_path); sys->nr_sobjs = hdr->copies; sys->flags = hdr->flags; if (!sys->nr_sobjs) @@ -212,6 +221,21 @@ static int cluster_get_vdi_attr(const struct sd_req *req, struct sd_rsp *rsp, return ret; } +static int local_get_store_list(const struct sd_req *req, struct sd_rsp *rsp, + void *data) +{ + struct strbuf buf = STRBUF_INIT; + struct store_driver *driver; + + list_for_each_entry(driver, &store_drivers, list) { + strbuf_addf(&buf, "%s ", driver->name); + } + memcpy(data, buf.buf, buf.len); + + strbuf_release(&buf); + return SD_RES_SUCCESS; +} + static int local_read_vdis(const struct sd_req *req, struct sd_rsp *rsp, void *data) { @@ -434,6 +458,12 @@ static struct sd_op_template sd_ops[] = { }, /* local operations */ + [SD_OP_GET_STORE_LIST] = { + .type = SD_OP_TYPE_LOCAL, + .force = 1, + .process_work = local_get_store_list, + }, + [SD_OP_READ_VDIS] = { .type = SD_OP_TYPE_LOCAL, .force = 1, diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 5145f06..bee2cac 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -166,7 +166,8 @@ struct siocb { }; struct store_driver { - const char *driver_name; + struct list_head list; + const char *name; int (*init)(char *path); int (*open)(uint64_t oid, struct siocb *, int create); int (*write)(uint64_t oid, struct siocb *); @@ -178,7 +179,33 @@ struct store_driver { int (*atomic_put)(uint64_t oid, struct siocb *); }; -extern void register_store_driver(struct store_driver *); +extern struct list_head store_drivers; +#define add_store_driver(driver) \ +static void __attribute__((constructor)) add_ ## driver(void) { \ + list_add(&driver.list, &store_drivers); \ +} + +static inline struct store_driver *find_store_driver(const char *name) +{ + struct store_driver *driver; + + list_for_each_entry(driver, &store_drivers, list) { + if (strcmp(driver->name, name) == 0) + return driver; + } + return NULL; +} + +static inline const char *index_to_name(int idx) +{ + static const char *stores[] = { + [0] = "simple", + }; + + if (idx < 0 || idx >= ARRAY_SIZE(stores)) + return "INVALID"; + return stores[idx]; +} extern struct cluster_info *sys; @@ -242,6 +269,9 @@ int store_write_obj(const struct sd_req *, struct sd_rsp *, void *); int store_read_obj(const struct sd_req *, struct sd_rsp *, void *); int store_remove_obj(const struct sd_req *, struct sd_rsp *, void *); +int store_file_write(void *buffer, size_t len); +void *store_file_read(void); + #define NR_GW_WORKER_THREAD 4 #define NR_IO_WORKER_THREAD 4 diff --git a/sheep/simple_store.c b/sheep/simple_store.c index ddaa781..a5711c1 100644 --- a/sheep/simple_store.c +++ b/sheep/simple_store.c @@ -239,8 +239,8 @@ out: return ret; } -struct store_driver store = { - .driver_name = "simple", +struct store_driver simple_store = { + .name = "simple", .init = simple_store_init, .open = simple_store_open, .write = simple_store_write, @@ -251,8 +251,4 @@ struct store_driver store = { .atomic_put = simple_store_atomic_put, }; -void register_store_driver(struct store_driver *driver) -{ - store = *driver; - eprintf("Register %s store driver\n", store.driver_name); -} +add_store_driver(simple_store); diff --git a/sheep/store.c b/sheep/store.c index 3323fbc..0025252 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -43,7 +43,8 @@ static char *config_path; static mode_t def_dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP; mode_t def_fmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; -extern struct store_driver store; +struct store_driver *sd_store; +LIST_HEAD(store_drivers); static int obj_cmp(const void *oid1, const void *oid2) { @@ -59,7 +60,7 @@ static int obj_cmp(const void *oid1, const void *oid2) static void get_store_dir(struct strbuf *buf, int epoch) { - if (!strcmp(store.name, "simple")) + if (!strcmp(sd_store->name, "simple")) strbuf_addf(buf, "%s%08u", obj_path, epoch); else /* XXX assume other store doesn't need epoch/obj pattern */ strbuf_addf(buf, "%s", obj_path); @@ -138,7 +139,7 @@ int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *d iocb.buf = buf; iocb.length = 0; iocb.epoch = i; - store.get_objlist(&iocb); + sd_store->get_objlist(&iocb); nr = merge_objlist(list, nr, (uint64_t *)iocb.buf, iocb.length); } out: @@ -173,17 +174,17 @@ static int read_copy_from_cluster(struct request *req, uint32_t epoch, if (is_myself(e[n].addr, e[n].port)) { memset(&iocb, 0, sizeof(iocb)); iocb.epoch = epoch; - ret = store.open(oid, &iocb, 0); + ret = sd_store->open(oid, &iocb, 0); if (ret != SD_RES_SUCCESS) continue; iocb.buf = buf; iocb.length = SD_DATA_OBJ_SIZE; iocb.offset = 0; - ret = store.read(oid, &iocb); + ret = sd_store->read(oid, &iocb); if (ret != SD_RES_SUCCESS) continue; - store.close(oid, &iocb); + sd_store->close(oid, &iocb); goto out; } @@ -426,7 +427,7 @@ out: int update_epoch_store(uint32_t epoch) { - if (!strcmp(store.name, "simple")) { + if (!strcmp(sd_store->name, "simple")) { char new[1024]; snprintf(new, sizeof(new), "%s%08u/", obj_path, epoch); @@ -565,21 +566,21 @@ int store_read_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data) memset(&iocb, 0, sizeof(iocb)); iocb.epoch = epoch; iocb.flags = hdr->flags; - ret = store.open(hdr->oid, &iocb, 0); + ret = sd_store->open(hdr->oid, &iocb, 0); if (ret != SD_RES_SUCCESS) return ret; iocb.buf = request->data; iocb.length = hdr->data_length; iocb.offset = hdr->offset; - ret = store.read(hdr->oid, &iocb); + ret = sd_store->read(hdr->oid, &iocb); if (ret != SD_RES_SUCCESS) goto out; rsps->data_length = hdr->data_length; rsps->copies = sys->nr_sobjs; out: - store.close(hdr->oid, &iocb); + sd_store->close(hdr->oid, &iocb); return ret; } @@ -604,11 +605,11 @@ static int do_write_obj(struct siocb *iocb, struct sd_obj_req *req, uint32_t epo strbuf_release(&buf); return SD_RES_EIO; } - ret = store.write(oid, iocb); + ret = sd_store->write(oid, iocb); jrnl_end(jd); strbuf_release(&buf); } else - ret = store.write(oid, iocb); + ret = sd_store->write(oid, iocb); return ret; } @@ -624,13 +625,13 @@ int store_write_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data) memset(&iocb, 0, sizeof(iocb)); iocb.epoch = epoch; iocb.flags = hdr->flags; - ret = store.open(hdr->oid, &iocb, 0); + ret = sd_store->open(hdr->oid, &iocb, 0); if (ret != SD_RES_SUCCESS) return ret; ret = do_write_obj(&iocb, hdr, epoch, request->data); - store.close(hdr->oid, &iocb); + sd_store->close(hdr->oid, &iocb); return ret; } @@ -651,7 +652,7 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi memset(&iocb, 0, sizeof(iocb)); iocb.epoch = epoch; iocb.flags = hdr->flags; - ret = store.open(hdr->oid, &iocb, 1); + ret = sd_store->open(hdr->oid, &iocb, 1); if (ret != SD_RES_SUCCESS) return ret; if (hdr->flags & SD_FLAG_CMD_COW) { @@ -666,14 +667,14 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi iocb.buf = buf; iocb.length = SD_DATA_OBJ_SIZE; iocb.offset = 0; - ret = store.write(hdr->oid, &iocb); + ret = sd_store->write(hdr->oid, &iocb); if (ret != SD_RES_SUCCESS) goto out; } ret = do_write_obj(&iocb, hdr, epoch, request->data); out: free(buf); - store.close(hdr->oid, &iocb); + sd_store->close(hdr->oid, &iocb); return ret; } @@ -1228,7 +1229,7 @@ static int recover_object_from_replica(uint64_t oid, if (is_myself(entry->addr, entry->port)) { iocb.epoch = epoch; - ret = store.link(oid, &iocb, tgt_epoch); + ret = sd_store->link(oid, &iocb, tgt_epoch); if (ret == SD_RES_SUCCESS) { ret = 0; goto done; @@ -1277,7 +1278,7 @@ static int recover_object_from_replica(uint64_t oid, iocb.epoch = epoch; iocb.length = rlen; iocb.buf = buf; - ret = store.atomic_put(oid, &iocb); + ret = sd_store->atomic_put(oid, &iocb); if (ret!= SD_RES_SUCCESS) { ret = -1; goto out; @@ -1410,9 +1411,9 @@ static void recover_object(struct work *work) eprintf("done:%"PRIu32" count:%"PRIu32", oid:%"PRIx64"\n", rw->done, rw->count, oid); iocb.epoch = epoch; - ret = store.open(oid, &iocb, 0); + ret = sd_store->open(oid, &iocb, 0); if (ret == SD_RES_SUCCESS) { - store.close(oid, &iocb); + sd_store->close(oid, &iocb); dprintf("the object is already recovered\n"); return; } @@ -1494,10 +1495,10 @@ int is_recoverying_oid(uint64_t oid) memset(&iocb, 0, sizeof(iocb)); iocb.epoch = sys->epoch; - ret = store.open(oid, &iocb, 0); + ret = sd_store->open(oid, &iocb, 0); if (ret == SD_RES_SUCCESS) { dprintf("the object %" PRIx64 " is already recoverd\n", oid); - store.close(oid, &iocb); + sd_store->close(oid, &iocb); return 0; } @@ -1993,6 +1994,7 @@ static int init_config_path(const char *base_path) int init_store(const char *d) { int ret; + char *driver_name; ret = init_obj_path(d); if (ret) @@ -2014,10 +2016,15 @@ int init_store(const char *d) if (ret) return ret; - ret = store.init(obj_path); - if (ret) - return ret; + driver_name = (char *)store_file_read(); + if (driver_name) + sd_store = find_store_driver(driver_name); + if (sd_store) { + ret = sd_store->init(obj_path); + if (ret != SD_RES_SUCCESS) + return ret; + } return ret; } @@ -2134,3 +2141,59 @@ int get_cluster_flags(uint16_t *flags) out: return ret; } + +void *store_file_read(void) +{ + struct strbuf buf = STRBUF_INIT; + struct stat st; + void *buffer = NULL; + int len, fd; + + strbuf_addf(&buf, "%s%s", obj_path, ".store"); + + fd = open(buf.buf, O_RDONLY); + if (fd < 0) { + dprintf("not found .store file\n"); + goto out; + } + if (fstat(fd, &st) < 0) { + dprintf("%m\n"); + goto out_close; + } + + len = st.st_size; + buffer = xmalloc(len); + len = xread(fd, buffer, len); + if (len != st.st_size) { + free(buffer); + buffer = NULL; + goto out_close; + } +out_close: + close(fd); +out: + strbuf_release(&buf); + return buffer; +} + +int store_file_write(void *buffer, size_t len) +{ + int fd, ret = -1; + struct strbuf buf = STRBUF_INIT; + + strbuf_addf(&buf, "%s%s", obj_path, ".store"); + fd = open(buf.buf, O_WRONLY | O_TRUNC | O_CREAT, def_fmode); + if (fd < 0) { + dprintf("%m\n"); + goto out; + } + + ret = xwrite(fd, buffer, len); + if (ret != len) + ret = -1; + + close(fd); +out: + strbuf_release(&buf); + return ret; +} -- 1.7.8.rc3 |