From: Liu Yuan <tailai.ly at taobao.com> - change global store structure to a pointer - use a list to maintain the stores. - use /obj/.store to remember backend store persistently. - now we can specify the backend store in the command collie cluster format -b farm #use farm if no store specified, currently sheep will use 'simple' store. if specified store not available, collie will return a list of available stores. Signed-off-by: Liu Yuan <tailai.ly at taobao.com> --- collie/cluster.c | 62 +++++++++++++++++++++++-- collie/collie.c | 1 + include/sheep.h | 6 ++- include/sheepdog_proto.h | 3 + sheep/ops.c | 30 ++++++++++++ sheep/sheep_priv.h | 25 +++++++++- sheep/simple_store.c | 10 +--- sheep/store.c | 115 +++++++++++++++++++++++++++++++++++----------- 8 files changed, 209 insertions(+), 43 deletions(-) diff --git a/collie/cluster.c b/collie/cluster.c index 6fbda6b..6b9cd4b 100644 --- a/collie/cluster.c +++ b/collie/cluster.c @@ -20,14 +20,56 @@ struct cluster_cmd_data { int copies; int nohalt; int force; + char name[STORE_LEN]; } cluster_cmd_data; +#define DEFAULT_STORE "simple" + static void set_nohalt(uint16_t *p) { if (p) *p |= SD_FLAG_NOHALT; } +static int list_store(void) +{ + int fd, ret; + struct sd_req hdr; + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; + unsigned rlen, wlen; + char buf[512] = { 0 }; + + fd = connect_to(sdhost, sdport); + if (fd < 0) + return EXIT_SYSFAIL; + + memset(&hdr, 0, sizeof(hdr)); + + wlen = 0; + rlen = 512; + hdr.opcode = SD_OP_GET_STORE_LIST; + hdr.data_length = rlen; + + ret = exec_req(fd, &hdr, buf, &wlen, &rlen); + close(fd); + + if (ret) { + fprintf(stderr, "Failed to connect\n"); + return EXIT_SYSFAIL; + } + + if (rsp->result != SD_RES_SUCCESS) { + fprintf(stderr, "Restore failed: %s\n", + sd_strerror(rsp->result)); + return EXIT_FAILURE; + } + + printf("Available stores:\n"); + printf("---------------------------------------\n"); + printf("%s\n", buf); + return EXIT_SYSFAIL; +} + static int cluster_format(int argc, char **argv) { int fd, ret; @@ -35,6 +77,7 @@ static int cluster_format(int argc, char **argv) struct sd_so_rsp *rsp = (struct sd_so_rsp *)&hdr; unsigned rlen, wlen; struct timeval tv; + char store_name[STORE_LEN]; fd = connect_to(sdhost, sdport); if (fd < 0) @@ -51,9 +94,15 @@ static int cluster_format(int argc, char **argv) hdr.epoch = node_list_version; hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; - rlen = 0; - wlen = 0; - ret = exec_req(fd, (struct sd_req *)&hdr, NULL, &wlen, &rlen); + if (strlen(cluster_cmd_data.name)) + strncpy(store_name, cluster_cmd_data.name, STORE_LEN); + else + strcpy(store_name, DEFAULT_STORE); + hdr.data_length = wlen = strlen(store_name) + 1; + hdr.flags = SD_FLAG_CMD_WRITE; + + printf("using backend %s store\n", store_name); + ret = exec_req(fd, (struct sd_req *)&hdr, store_name, &wlen, &rlen); close(fd); if (ret) { @@ -64,7 +113,7 @@ static int cluster_format(int argc, char **argv) if (rsp->result != SD_RES_SUCCESS) { fprintf(stderr, "Format failed: %s\n", sd_strerror(rsp->result)); - return EXIT_FAILURE; + return list_store(); } return EXIT_SUCCESS; @@ -237,7 +286,7 @@ static int cluster_recover(int argc, char **argv) static struct subcommand cluster_cmd[] = { {"info", NULL, "aprh", "show cluster information", 0, cluster_info}, - {"format", NULL, "cHaph", "create a Sheepdog store", + {"format", NULL, "bcHaph", "create a Sheepdog store", 0, cluster_format}, {"shutdown", NULL, "aph", "stop Sheepdog", SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown}, @@ -252,6 +301,9 @@ static int cluster_parser(int ch, char *opt) char *p; switch (ch) { + case 'b': + strncpy(cluster_cmd_data.name, opt, 10); + break; case 'c': copies = strtol(opt, &p, 10); if (opt == p || copies < 1) { diff --git a/collie/collie.c b/collie/collie.c index 19cc9a9..baf7c67 100644 --- a/collie/collie.c +++ b/collie/collie.c @@ -40,6 +40,7 @@ static const struct sd_option collie_options[] = { {'d', "delete", 0, "delete a key"}, /* cluster options */ + {'b', "store", 1, "specify backend store"}, {'c', "copies", 1, "specify the data redundancy (number of copies)"}, {'H', "nohalt", 0, "serve IO requests even if there are too few\n\ nodes for the configured redundancy"}, diff --git a/include/sheep.h b/include/sheep.h index 906c1f5..bbca7f8 100644 --- a/include/sheep.h +++ b/include/sheep.h @@ -27,7 +27,7 @@ #define SD_MAX_VNODES 65536 #define SD_MAX_VMS 4096 /* FIXME: should be removed */ -#define SD_OP_SHEEP 0x80 +#define SD_OP_SHEEP 0x80 #define SD_OP_DEL_VDI 0x81 #define SD_OP_GET_NODE_LIST 0x82 #define SD_OP_GET_VM_LIST 0x83 @@ -37,7 +37,8 @@ #define SD_OP_STAT_CLUSTER 0x87 #define SD_OP_KILL_NODE 0x88 #define SD_OP_GET_VDI_ATTR 0x89 -#define SD_OP_RECOVER 0x8A +#define SD_OP_RECOVER 0x8a +#define SD_OP_GET_STORE_LIST 0x90 #define SD_FLAG_CMD_IO_LOCAL 0x0010 #define SD_FLAG_CMD_RECOVERY 0x0020 @@ -263,6 +264,7 @@ static inline const char *sd_strerror(int err) {SD_RES_JOIN_FAILED, "Node has failed to join cluster"}, {SD_RES_HALT, "IO has halted as there are too few living nodes"}, {SD_RES_MANUAL_RECOVER, "Cluster is running/halted and cannot be manually recovered"}, + {SD_RES_NO_STORE, "Targeted backend store is not found"}, {SD_RES_OLD_NODE_VER, "Remote node has an old epoch"}, {SD_RES_NEW_NODE_VER, "Remote node has a new epoch"}, diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index b664223..de326ee 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -59,6 +59,7 @@ #define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */ #define SD_RES_HALT 0x19 /* Sheepdog is stopped doing IO */ #define SD_RES_MANUAL_RECOVER 0x1A /* Users should not manually recover this cluster */ +#define SD_RES_NO_STORE 0x20 /* No targeted backend store */ /* * Object ID rules @@ -91,6 +92,8 @@ #define SD_ATTR_OBJ_SIZE (sizeof(struct sheepdog_vdi_attr)) #define CURRENT_VDI_ID 0 +#define STORE_LEN 16 + struct sd_req { uint8_t proto_ver; uint8_t opcode; diff --git a/sheep/ops.c b/sheep/ops.c index 13ecdf2..3f65477 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -12,6 +12,10 @@ #include <stdlib.h> #include "sheep_priv.h" +#include "strbuf.h" + +extern char *obj_path; +extern struct store_driver *sd_store; enum sd_op_type { SD_OP_TYPE_CLUSTER = 1, /* cluster operations */ @@ -132,6 +136,11 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp, int i, latest_epoch, ret; uint64_t ctime; + sd_store = find_store_driver(data); + if (!sd_store) + return SD_RES_NO_STORE; + + sd_store->init(obj_path); sys->nr_sobjs = hdr->copies; sys->flags = hdr->flags; if (!sys->nr_sobjs) @@ -212,6 +221,21 @@ static int cluster_get_vdi_attr(const struct sd_req *req, struct sd_rsp *rsp, return ret; } +static int local_get_store_list(const struct sd_req *req, struct sd_rsp *rsp, + void *data) +{ + struct strbuf buf = STRBUF_INIT; + struct store_driver *driver; + + list_for_each_entry(driver, &store_drivers, list) { + strbuf_addf(&buf, "%s ", driver->name); + } + memcpy(data, buf.buf, buf.len); + + strbuf_release(&buf); + return SD_RES_SUCCESS; +} + static int local_read_vdis(const struct sd_req *req, struct sd_rsp *rsp, void *data) { @@ -434,6 +458,12 @@ static struct sd_op_template sd_ops[] = { }, /* local operations */ + [SD_OP_GET_STORE_LIST] = { + .type = SD_OP_TYPE_LOCAL, + .force = 1, + .process_work = local_get_store_list, + }, + [SD_OP_READ_VDIS] = { .type = SD_OP_TYPE_LOCAL, .force = 1, diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 5145f06..0a25c7d 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -166,7 +166,8 @@ struct siocb { }; struct store_driver { - const char *driver_name; + struct list_head list; + const char *name; int (*init)(char *path); int (*open)(uint64_t oid, struct siocb *, int create); int (*write)(uint64_t oid, struct siocb *); @@ -178,7 +179,22 @@ struct store_driver { int (*atomic_put)(uint64_t oid, struct siocb *); }; -extern void register_store_driver(struct store_driver *); +extern struct list_head store_drivers; +#define add_store_driver(driver) \ +static void __attribute__((constructor)) add_ ## driver(void) { \ + list_add(&driver.list, &store_drivers); \ +} + +static inline struct store_driver *find_store_driver(const char *name) +{ + struct store_driver *driver; + + list_for_each_entry(driver, &store_drivers, list) { + if (strcmp(driver->name, name) == 0) + return driver; + } + return NULL; +} extern struct cluster_info *sys; @@ -236,12 +252,17 @@ int set_cluster_copies(uint8_t copies); int get_cluster_copies(uint8_t *copies); int set_cluster_flags(uint16_t flags); int get_cluster_flags(uint16_t *flags); +int set_cluster_store(const uint8_t *name); +int get_cluster_store(uint8_t *buf); int store_create_and_write_obj(const struct sd_req *, struct sd_rsp *, void *); int store_write_obj(const struct sd_req *, struct sd_rsp *, void *); int store_read_obj(const struct sd_req *, struct sd_rsp *, void *); int store_remove_obj(const struct sd_req *, struct sd_rsp *, void *); +int store_file_write(void *buffer, size_t len); +void *store_file_read(void); + #define NR_GW_WORKER_THREAD 4 #define NR_IO_WORKER_THREAD 4 diff --git a/sheep/simple_store.c b/sheep/simple_store.c index ddaa781..a5711c1 100644 --- a/sheep/simple_store.c +++ b/sheep/simple_store.c @@ -239,8 +239,8 @@ out: return ret; } -struct store_driver store = { - .driver_name = "simple", +struct store_driver simple_store = { + .name = "simple", .init = simple_store_init, .open = simple_store_open, .write = simple_store_write, @@ -251,8 +251,4 @@ struct store_driver store = { .atomic_put = simple_store_atomic_put, }; -void register_store_driver(struct store_driver *driver) -{ - store = *driver; - eprintf("Register %s store driver\n", store.driver_name); -} +add_store_driver(simple_store); diff --git a/sheep/store.c b/sheep/store.c index 3323fbc..fa2849e 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -31,7 +31,7 @@ struct sheepdog_config { uint64_t ctime; uint16_t flags; uint8_t copies; - uint8_t pad[3]; + uint8_t store[STORE_LEN]; }; char *obj_path; @@ -43,7 +43,8 @@ static char *config_path; static mode_t def_dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP; mode_t def_fmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; -extern struct store_driver store; +struct store_driver *sd_store; +LIST_HEAD(store_drivers); static int obj_cmp(const void *oid1, const void *oid2) { @@ -59,7 +60,7 @@ static int obj_cmp(const void *oid1, const void *oid2) static void get_store_dir(struct strbuf *buf, int epoch) { - if (!strcmp(store.name, "simple")) + if (!strcmp(sd_store->name, "simple")) strbuf_addf(buf, "%s%08u", obj_path, epoch); else /* XXX assume other store doesn't need epoch/obj pattern */ strbuf_addf(buf, "%s", obj_path); @@ -138,7 +139,7 @@ int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *d iocb.buf = buf; iocb.length = 0; iocb.epoch = i; - store.get_objlist(&iocb); + sd_store->get_objlist(&iocb); nr = merge_objlist(list, nr, (uint64_t *)iocb.buf, iocb.length); } out: @@ -173,17 +174,17 @@ static int read_copy_from_cluster(struct request *req, uint32_t epoch, if (is_myself(e[n].addr, e[n].port)) { memset(&iocb, 0, sizeof(iocb)); iocb.epoch = epoch; - ret = store.open(oid, &iocb, 0); + ret = sd_store->open(oid, &iocb, 0); if (ret != SD_RES_SUCCESS) continue; iocb.buf = buf; iocb.length = SD_DATA_OBJ_SIZE; iocb.offset = 0; - ret = store.read(oid, &iocb); + ret = sd_store->read(oid, &iocb); if (ret != SD_RES_SUCCESS) continue; - store.close(oid, &iocb); + sd_store->close(oid, &iocb); goto out; } @@ -426,7 +427,7 @@ out: int update_epoch_store(uint32_t epoch) { - if (!strcmp(store.name, "simple")) { + if (!strcmp(sd_store->name, "simple")) { char new[1024]; snprintf(new, sizeof(new), "%s%08u/", obj_path, epoch); @@ -565,21 +566,21 @@ int store_read_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data) memset(&iocb, 0, sizeof(iocb)); iocb.epoch = epoch; iocb.flags = hdr->flags; - ret = store.open(hdr->oid, &iocb, 0); + ret = sd_store->open(hdr->oid, &iocb, 0); if (ret != SD_RES_SUCCESS) return ret; iocb.buf = request->data; iocb.length = hdr->data_length; iocb.offset = hdr->offset; - ret = store.read(hdr->oid, &iocb); + ret = sd_store->read(hdr->oid, &iocb); if (ret != SD_RES_SUCCESS) goto out; rsps->data_length = hdr->data_length; rsps->copies = sys->nr_sobjs; out: - store.close(hdr->oid, &iocb); + sd_store->close(hdr->oid, &iocb); return ret; } @@ -604,11 +605,11 @@ static int do_write_obj(struct siocb *iocb, struct sd_obj_req *req, uint32_t epo strbuf_release(&buf); return SD_RES_EIO; } - ret = store.write(oid, iocb); + ret = sd_store->write(oid, iocb); jrnl_end(jd); strbuf_release(&buf); } else - ret = store.write(oid, iocb); + ret = sd_store->write(oid, iocb); return ret; } @@ -624,13 +625,13 @@ int store_write_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data) memset(&iocb, 0, sizeof(iocb)); iocb.epoch = epoch; iocb.flags = hdr->flags; - ret = store.open(hdr->oid, &iocb, 0); + ret = sd_store->open(hdr->oid, &iocb, 0); if (ret != SD_RES_SUCCESS) return ret; ret = do_write_obj(&iocb, hdr, epoch, request->data); - store.close(hdr->oid, &iocb); + sd_store->close(hdr->oid, &iocb); return ret; } @@ -651,7 +652,7 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi memset(&iocb, 0, sizeof(iocb)); iocb.epoch = epoch; iocb.flags = hdr->flags; - ret = store.open(hdr->oid, &iocb, 1); + ret = sd_store->open(hdr->oid, &iocb, 1); if (ret != SD_RES_SUCCESS) return ret; if (hdr->flags & SD_FLAG_CMD_COW) { @@ -666,14 +667,14 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi iocb.buf = buf; iocb.length = SD_DATA_OBJ_SIZE; iocb.offset = 0; - ret = store.write(hdr->oid, &iocb); + ret = sd_store->write(hdr->oid, &iocb); if (ret != SD_RES_SUCCESS) goto out; } ret = do_write_obj(&iocb, hdr, epoch, request->data); out: free(buf); - store.close(hdr->oid, &iocb); + sd_store->close(hdr->oid, &iocb); return ret; } @@ -1228,7 +1229,7 @@ static int recover_object_from_replica(uint64_t oid, if (is_myself(entry->addr, entry->port)) { iocb.epoch = epoch; - ret = store.link(oid, &iocb, tgt_epoch); + ret = sd_store->link(oid, &iocb, tgt_epoch); if (ret == SD_RES_SUCCESS) { ret = 0; goto done; @@ -1277,7 +1278,7 @@ static int recover_object_from_replica(uint64_t oid, iocb.epoch = epoch; iocb.length = rlen; iocb.buf = buf; - ret = store.atomic_put(oid, &iocb); + ret = sd_store->atomic_put(oid, &iocb); if (ret!= SD_RES_SUCCESS) { ret = -1; goto out; @@ -1410,9 +1411,9 @@ static void recover_object(struct work *work) eprintf("done:%"PRIu32" count:%"PRIu32", oid:%"PRIx64"\n", rw->done, rw->count, oid); iocb.epoch = epoch; - ret = store.open(oid, &iocb, 0); + ret = sd_store->open(oid, &iocb, 0); if (ret == SD_RES_SUCCESS) { - store.close(oid, &iocb); + sd_store->close(oid, &iocb); dprintf("the object is already recovered\n"); return; } @@ -1494,10 +1495,10 @@ int is_recoverying_oid(uint64_t oid) memset(&iocb, 0, sizeof(iocb)); iocb.epoch = sys->epoch; - ret = store.open(oid, &iocb, 0); + ret = sd_store->open(oid, &iocb, 0); if (ret == SD_RES_SUCCESS) { dprintf("the object %" PRIx64 " is already recoverd\n", oid); - store.close(oid, &iocb); + sd_store->close(oid, &iocb); return 0; } @@ -1993,6 +1994,7 @@ static int init_config_path(const char *base_path) int init_store(const char *d) { int ret; + uint8_t driver_name[STORE_LEN]; ret = init_obj_path(d); if (ret) @@ -2014,10 +2016,19 @@ int init_store(const char *d) if (ret) return ret; - ret = store.init(obj_path); - if (ret) - return ret; + ret = get_cluster_store(driver_name); + if (ret != SD_RES_SUCCESS) + return 1; + + if (strlen((char *)driver_name)) + sd_store = find_store_driver((char *)driver_name); + if (sd_store) { + ret = sd_store->init(obj_path); + if (ret != SD_RES_SUCCESS) + return ret; + } else + dprintf("no store found\n"); return ret; } @@ -2134,3 +2145,53 @@ int get_cluster_flags(uint16_t *flags) out: return ret; } + +int set_cluster_store(const uint8_t *name) +{ + int fd, ret = SD_RES_EIO, len; + void *jd; + + fd = open(config_path, O_DSYNC | O_WRONLY); + if (fd < 0) + goto out; + + len = strlen((char *)name) + 1; + jd = jrnl_begin((void *)name, len, + offsetof(struct sheepdog_config, store), + config_path, jrnl_path); + if (!jd) { + ret = SD_RES_EIO; + goto err; + } + ret = xpwrite(fd, name, len, offsetof(struct sheepdog_config, store)); + if (ret != len) + ret = SD_RES_EIO; + else + ret = SD_RES_SUCCESS; + jrnl_end(jd); +err: + close(fd); +out: + return ret; +} + +int get_cluster_store(uint8_t *buf) +{ + int fd, ret = SD_RES_EIO; + + fd = open(config_path, O_RDONLY); + if (fd < 0) + goto out; + + ret = pread(fd, buf, STORE_LEN, + offsetof(struct sheepdog_config, store)); + + if (ret == -1) + ret = SD_RES_EIO; + else + ret = SD_RES_SUCCESS; + + close(fd); +out: + return ret; +} -- 1.7.8.2 |