[Sheepdog] [PATCH v5 02/17] store: add dynamic mechanism to chain the available backend stores.
Liu Yuan
namei.unix at gmail.com
Fri Dec 30 14:06:57 CET 2011
From: Liu Yuan <tailai.ly at taobao.com>
- change global store structure to a pointer
- use a list to maintain the stores.
- use /obj/.store to remember backend store persistently.
- now we can specify the backend store in the command
collie cluster format -b farm #use farm
if no store specified, currently sheep will use 'simple' store.
if specified store not available, collie will return a list of
available stores.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
collie/cluster.c | 59 +++++++++++++++++++++++-
collie/collie.c | 1 +
include/sheep.h | 9 ++-
include/sheepdog_proto.h | 1 +
sheep/ops.c | 30 ++++++++++++
sheep/sheep_priv.h | 34 +++++++++++++-
sheep/simple_store.c | 10 +---
sheep/store.c | 115 +++++++++++++++++++++++++++++++++++----------
8 files changed, 219 insertions(+), 40 deletions(-)
diff --git a/collie/cluster.c b/collie/cluster.c
index 6fbda6b..a97ef79 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -20,6 +20,7 @@ struct cluster_cmd_data {
int copies;
int nohalt;
int force;
+ char argv[10];
} cluster_cmd_data;
static void set_nohalt(uint16_t *p)
@@ -28,6 +29,53 @@ static void set_nohalt(uint16_t *p)
*p |= SD_FLAG_NOHALT;
}
+static int get_store_index(char *name)
+{
+ int ret = -1;
+ if (!strlen(name) || strcmp(name, "simple") == 0)
+ ret = 0;
+ return ret;
+}
+
+static int list_store(void)
+{
+ int fd, ret;
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+ unsigned rlen, wlen;
+ char buf[512] = { 0 };
+
+ fd = connect_to(sdhost, sdport);
+ if (fd < 0)
+ return EXIT_SYSFAIL;
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ wlen = 0;
+ rlen = 512;
+ hdr.opcode = SD_OP_GET_STORE_LIST;
+ hdr.data_length = rlen;
+
+ ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
+ close(fd);
+
+ if (ret) {
+ fprintf(stderr, "Failed to connect\n");
+ return EXIT_FAILURE;
+ }
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ fprintf(stderr, "Restore failed: %s\n",
+ sd_strerror(rsp->result));
+ return EXIT_FAILURE;
+ }
+
+ printf("Available stores:\n");
+ printf("---------------------------------------\n");
+ printf("%s\n", buf);
+ return EXIT_SUCCESS;
+}
+
static int cluster_format(int argc, char **argv)
{
int fd, ret;
@@ -35,6 +83,9 @@ static int cluster_format(int argc, char **argv)
struct sd_so_rsp *rsp = (struct sd_so_rsp *)&hdr;
unsigned rlen, wlen;
struct timeval tv;
+ uint8_t idx;
+
+ idx = get_store_index(cluster_cmd_data.argv);
fd = connect_to(sdhost, sdport);
if (fd < 0)
@@ -50,6 +101,7 @@ static int cluster_format(int argc, char **argv)
set_nohalt(&hdr.flags);
hdr.epoch = node_list_version;
hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
+ hdr.index = idx;
rlen = 0;
wlen = 0;
@@ -64,7 +116,7 @@ static int cluster_format(int argc, char **argv)
if (rsp->result != SD_RES_SUCCESS) {
fprintf(stderr, "Format failed: %s\n",
sd_strerror(rsp->result));
- return EXIT_FAILURE;
+ return list_store();
}
return EXIT_SUCCESS;
@@ -237,7 +289,7 @@ static int cluster_recover(int argc, char **argv)
static struct subcommand cluster_cmd[] = {
{"info", NULL, "aprh", "show cluster information",
0, cluster_info},
- {"format", NULL, "cHaph", "create a Sheepdog store",
+ {"format", NULL, "bcHaph", "create a Sheepdog store",
0, cluster_format},
{"shutdown", NULL, "aph", "stop Sheepdog",
SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown},
@@ -252,6 +304,9 @@ static int cluster_parser(int ch, char *opt)
char *p;
switch (ch) {
+ case 'b':
+ strcpy(cluster_cmd_data.argv, opt);
+ break;
case 'c':
copies = strtol(opt, &p, 10);
if (opt == p || copies < 1) {
diff --git a/collie/collie.c b/collie/collie.c
index 19cc9a9..baf7c67 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -40,6 +40,7 @@ static const struct sd_option collie_options[] = {
{'d', "delete", 0, "delete a key"},
/* cluster options */
+ {'b', "store", 1, "specify backend store"},
{'c', "copies", 1, "specify the data redundancy (number of copies)"},
{'H', "nohalt", 0, "serve IO requests even if there are too few\n\
nodes for the configured redundancy"},
diff --git a/include/sheep.h b/include/sheep.h
index 906c1f5..6cd63e7 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -27,7 +27,7 @@
#define SD_MAX_VNODES 65536
#define SD_MAX_VMS 4096 /* FIXME: should be removed */
-#define SD_OP_SHEEP 0x80
+#define SD_OP_SHEEP 0x80
#define SD_OP_DEL_VDI 0x81
#define SD_OP_GET_NODE_LIST 0x82
#define SD_OP_GET_VM_LIST 0x83
@@ -37,7 +37,8 @@
#define SD_OP_STAT_CLUSTER 0x87
#define SD_OP_KILL_NODE 0x88
#define SD_OP_GET_VDI_ATTR 0x89
-#define SD_OP_RECOVER 0x8A
+#define SD_OP_RECOVER 0x8a
+#define SD_OP_GET_STORE_LIST 0x90
#define SD_FLAG_CMD_IO_LOCAL 0x0010
#define SD_FLAG_CMD_RECOVERY 0x0020
@@ -72,7 +73,8 @@ struct sd_so_req {
uint64_t ctime;
uint32_t copies;
uint32_t tag;
- uint32_t opcode_specific[2];
+ uint8_t index;
+ uint8_t opcode_specific[7];
};
struct sd_so_rsp {
@@ -263,6 +265,7 @@ static inline const char *sd_strerror(int err)
{SD_RES_JOIN_FAILED, "Node has failed to join cluster"},
{SD_RES_HALT, "IO has halted as there are too few living nodes"},
{SD_RES_MANUAL_RECOVER, "Cluster is running/halted and cannot be manually recovered"},
+ {SD_RES_NO_STORE, "Targeted backend store is not found"},
{SD_RES_OLD_NODE_VER, "Remote node has an old epoch"},
{SD_RES_NEW_NODE_VER, "Remote node has a new epoch"},
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index b664223..289502f 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -59,6 +59,7 @@
#define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */
#define SD_RES_HALT 0x19 /* Sheepdog is stopped doing IO */
#define SD_RES_MANUAL_RECOVER 0x1A /* Users should not manually recover this cluster */
+#define SD_RES_NO_STORE 0x20 /* No targeted backend store */
/*
* Object ID rules
diff --git a/sheep/ops.c b/sheep/ops.c
index 13ecdf2..478d41a 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -12,6 +12,10 @@
#include <stdlib.h>
#include "sheep_priv.h"
+#include "strbuf.h"
+
+extern char *obj_path;
+extern struct store_driver *sd_store;
enum sd_op_type {
SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
@@ -132,6 +136,11 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
int i, latest_epoch, ret;
uint64_t ctime;
+ sd_store = find_store_driver(index_to_name(hdr->index));
+ if (!sd_store)
+ return SD_RES_NO_STORE;
+
+ sd_store->init(obj_path);
sys->nr_sobjs = hdr->copies;
sys->flags = hdr->flags;
if (!sys->nr_sobjs)
@@ -212,6 +221,21 @@ static int cluster_get_vdi_attr(const struct sd_req *req, struct sd_rsp *rsp,
return ret;
}
+static int local_get_store_list(const struct sd_req *req, struct sd_rsp *rsp,
+ void *data)
+{
+ struct strbuf buf = STRBUF_INIT;
+ struct store_driver *driver;
+
+ list_for_each_entry(driver, &store_drivers, list) {
+ strbuf_addf(&buf, "%s ", driver->name);
+ }
+ memcpy(data, buf.buf, buf.len);
+
+ strbuf_release(&buf);
+ return SD_RES_SUCCESS;
+}
+
static int local_read_vdis(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
@@ -434,6 +458,12 @@ static struct sd_op_template sd_ops[] = {
},
/* local operations */
+ [SD_OP_GET_STORE_LIST] = {
+ .type = SD_OP_TYPE_LOCAL,
+ .force = 1,
+ .process_work = local_get_store_list,
+ },
+
[SD_OP_READ_VDIS] = {
.type = SD_OP_TYPE_LOCAL,
.force = 1,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 5145f06..bee2cac 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -166,7 +166,8 @@ struct siocb {
};
struct store_driver {
- const char *driver_name;
+ struct list_head list;
+ const char *name;
int (*init)(char *path);
int (*open)(uint64_t oid, struct siocb *, int create);
int (*write)(uint64_t oid, struct siocb *);
@@ -178,7 +179,33 @@ struct store_driver {
int (*atomic_put)(uint64_t oid, struct siocb *);
};
-extern void register_store_driver(struct store_driver *);
+extern struct list_head store_drivers;
+#define add_store_driver(driver) \
+static void __attribute__((constructor)) add_ ## driver(void) { \
+ list_add(&driver.list, &store_drivers); \
+}
+
+static inline struct store_driver *find_store_driver(const char *name)
+{
+ struct store_driver *driver;
+
+ list_for_each_entry(driver, &store_drivers, list) {
+ if (strcmp(driver->name, name) == 0)
+ return driver;
+ }
+ return NULL;
+}
+
+static inline const char *index_to_name(int idx)
+{
+ static const char *stores[] = {
+ [0] = "simple",
+ };
+
+ if (idx < 0 || idx >= ARRAY_SIZE(stores))
+ return "INVALID";
+ return stores[idx];
+}
extern struct cluster_info *sys;
@@ -242,6 +269,9 @@ int store_write_obj(const struct sd_req *, struct sd_rsp *, void *);
int store_read_obj(const struct sd_req *, struct sd_rsp *, void *);
int store_remove_obj(const struct sd_req *, struct sd_rsp *, void *);
+int store_file_write(void *buffer, size_t len);
+void *store_file_read(void);
+
#define NR_GW_WORKER_THREAD 4
#define NR_IO_WORKER_THREAD 4
diff --git a/sheep/simple_store.c b/sheep/simple_store.c
index ddaa781..a5711c1 100644
--- a/sheep/simple_store.c
+++ b/sheep/simple_store.c
@@ -239,8 +239,8 @@ out:
return ret;
}
-struct store_driver store = {
- .driver_name = "simple",
+struct store_driver simple_store = {
+ .name = "simple",
.init = simple_store_init,
.open = simple_store_open,
.write = simple_store_write,
@@ -251,8 +251,4 @@ struct store_driver store = {
.atomic_put = simple_store_atomic_put,
};
-void register_store_driver(struct store_driver *driver)
-{
- store = *driver;
- eprintf("Register %s store driver\n", store.driver_name);
-}
+add_store_driver(simple_store);
diff --git a/sheep/store.c b/sheep/store.c
index 3323fbc..0025252 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -43,7 +43,8 @@ static char *config_path;
static mode_t def_dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP;
mode_t def_fmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
-extern struct store_driver store;
+struct store_driver *sd_store;
+LIST_HEAD(store_drivers);
static int obj_cmp(const void *oid1, const void *oid2)
{
@@ -59,7 +60,7 @@ static int obj_cmp(const void *oid1, const void *oid2)
static void get_store_dir(struct strbuf *buf, int epoch)
{
- if (!strcmp(store.name, "simple"))
+ if (!strcmp(sd_store->name, "simple"))
strbuf_addf(buf, "%s%08u", obj_path, epoch);
else /* XXX assume other store doesn't need epoch/obj pattern */
strbuf_addf(buf, "%s", obj_path);
@@ -138,7 +139,7 @@ int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *d
iocb.buf = buf;
iocb.length = 0;
iocb.epoch = i;
- store.get_objlist(&iocb);
+ sd_store->get_objlist(&iocb);
nr = merge_objlist(list, nr, (uint64_t *)iocb.buf, iocb.length);
}
out:
@@ -173,17 +174,17 @@ static int read_copy_from_cluster(struct request *req, uint32_t epoch,
if (is_myself(e[n].addr, e[n].port)) {
memset(&iocb, 0, sizeof(iocb));
iocb.epoch = epoch;
- ret = store.open(oid, &iocb, 0);
+ ret = sd_store->open(oid, &iocb, 0);
if (ret != SD_RES_SUCCESS)
continue;
iocb.buf = buf;
iocb.length = SD_DATA_OBJ_SIZE;
iocb.offset = 0;
- ret = store.read(oid, &iocb);
+ ret = sd_store->read(oid, &iocb);
if (ret != SD_RES_SUCCESS)
continue;
- store.close(oid, &iocb);
+ sd_store->close(oid, &iocb);
goto out;
}
@@ -426,7 +427,7 @@ out:
int update_epoch_store(uint32_t epoch)
{
- if (!strcmp(store.name, "simple")) {
+ if (!strcmp(sd_store->name, "simple")) {
char new[1024];
snprintf(new, sizeof(new), "%s%08u/", obj_path, epoch);
@@ -565,21 +566,21 @@ int store_read_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
memset(&iocb, 0, sizeof(iocb));
iocb.epoch = epoch;
iocb.flags = hdr->flags;
- ret = store.open(hdr->oid, &iocb, 0);
+ ret = sd_store->open(hdr->oid, &iocb, 0);
if (ret != SD_RES_SUCCESS)
return ret;
iocb.buf = request->data;
iocb.length = hdr->data_length;
iocb.offset = hdr->offset;
- ret = store.read(hdr->oid, &iocb);
+ ret = sd_store->read(hdr->oid, &iocb);
if (ret != SD_RES_SUCCESS)
goto out;
rsps->data_length = hdr->data_length;
rsps->copies = sys->nr_sobjs;
out:
- store.close(hdr->oid, &iocb);
+ sd_store->close(hdr->oid, &iocb);
return ret;
}
@@ -604,11 +605,11 @@ static int do_write_obj(struct siocb *iocb, struct sd_obj_req *req, uint32_t epo
strbuf_release(&buf);
return SD_RES_EIO;
}
- ret = store.write(oid, iocb);
+ ret = sd_store->write(oid, iocb);
jrnl_end(jd);
strbuf_release(&buf);
} else
- ret = store.write(oid, iocb);
+ ret = sd_store->write(oid, iocb);
return ret;
}
@@ -624,13 +625,13 @@ int store_write_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
memset(&iocb, 0, sizeof(iocb));
iocb.epoch = epoch;
iocb.flags = hdr->flags;
- ret = store.open(hdr->oid, &iocb, 0);
+ ret = sd_store->open(hdr->oid, &iocb, 0);
if (ret != SD_RES_SUCCESS)
return ret;
ret = do_write_obj(&iocb, hdr, epoch, request->data);
- store.close(hdr->oid, &iocb);
+ sd_store->close(hdr->oid, &iocb);
return ret;
}
@@ -651,7 +652,7 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi
memset(&iocb, 0, sizeof(iocb));
iocb.epoch = epoch;
iocb.flags = hdr->flags;
- ret = store.open(hdr->oid, &iocb, 1);
+ ret = sd_store->open(hdr->oid, &iocb, 1);
if (ret != SD_RES_SUCCESS)
return ret;
if (hdr->flags & SD_FLAG_CMD_COW) {
@@ -666,14 +667,14 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi
iocb.buf = buf;
iocb.length = SD_DATA_OBJ_SIZE;
iocb.offset = 0;
- ret = store.write(hdr->oid, &iocb);
+ ret = sd_store->write(hdr->oid, &iocb);
if (ret != SD_RES_SUCCESS)
goto out;
}
ret = do_write_obj(&iocb, hdr, epoch, request->data);
out:
free(buf);
- store.close(hdr->oid, &iocb);
+ sd_store->close(hdr->oid, &iocb);
return ret;
}
@@ -1228,7 +1229,7 @@ static int recover_object_from_replica(uint64_t oid,
if (is_myself(entry->addr, entry->port)) {
iocb.epoch = epoch;
- ret = store.link(oid, &iocb, tgt_epoch);
+ ret = sd_store->link(oid, &iocb, tgt_epoch);
if (ret == SD_RES_SUCCESS) {
ret = 0;
goto done;
@@ -1277,7 +1278,7 @@ static int recover_object_from_replica(uint64_t oid,
iocb.epoch = epoch;
iocb.length = rlen;
iocb.buf = buf;
- ret = store.atomic_put(oid, &iocb);
+ ret = sd_store->atomic_put(oid, &iocb);
if (ret!= SD_RES_SUCCESS) {
ret = -1;
goto out;
@@ -1410,9 +1411,9 @@ static void recover_object(struct work *work)
eprintf("done:%"PRIu32" count:%"PRIu32", oid:%"PRIx64"\n", rw->done, rw->count, oid);
iocb.epoch = epoch;
- ret = store.open(oid, &iocb, 0);
+ ret = sd_store->open(oid, &iocb, 0);
if (ret == SD_RES_SUCCESS) {
- store.close(oid, &iocb);
+ sd_store->close(oid, &iocb);
dprintf("the object is already recovered\n");
return;
}
@@ -1494,10 +1495,10 @@ int is_recoverying_oid(uint64_t oid)
memset(&iocb, 0, sizeof(iocb));
iocb.epoch = sys->epoch;
- ret = store.open(oid, &iocb, 0);
+ ret = sd_store->open(oid, &iocb, 0);
if (ret == SD_RES_SUCCESS) {
dprintf("the object %" PRIx64 " is already recoverd\n", oid);
- store.close(oid, &iocb);
+ sd_store->close(oid, &iocb);
return 0;
}
@@ -1993,6 +1994,7 @@ static int init_config_path(const char *base_path)
int init_store(const char *d)
{
int ret;
+ char *driver_name;
ret = init_obj_path(d);
if (ret)
@@ -2014,10 +2016,15 @@ int init_store(const char *d)
if (ret)
return ret;
- ret = store.init(obj_path);
- if (ret)
- return ret;
+ driver_name = (char *)store_file_read();
+ if (driver_name)
+ sd_store = find_store_driver(driver_name);
+ if (sd_store) {
+ ret = sd_store->init(obj_path);
+ if (ret != SD_RES_SUCCESS)
+ return ret;
+ }
return ret;
}
@@ -2134,3 +2141,59 @@ int get_cluster_flags(uint16_t *flags)
out:
return ret;
}
+
+void *store_file_read(void)
+{
+ struct strbuf buf = STRBUF_INIT;
+ struct stat st;
+ void *buffer = NULL;
+ int len, fd;
+
+ strbuf_addf(&buf, "%s%s", obj_path, ".store");
+
+ fd = open(buf.buf, O_RDONLY);
+ if (fd < 0) {
+ dprintf("not found .store file\n");
+ goto out;
+ }
+ if (fstat(fd, &st) < 0) {
+ dprintf("%m\n");
+ goto out_close;
+ }
+
+ len = st.st_size;
+ buffer = xmalloc(len);
+ len = xread(fd, buffer, len);
+ if (len != st.st_size) {
+ free(buffer);
+ buffer = NULL;
+ goto out_close;
+ }
+out_close:
+ close(fd);
+out:
+ strbuf_release(&buf);
+ return buffer;
+}
+
+int store_file_write(void *buffer, size_t len)
+{
+ int fd, ret = -1;
+ struct strbuf buf = STRBUF_INIT;
+
+ strbuf_addf(&buf, "%s%s", obj_path, ".store");
+ fd = open(buf.buf, O_WRONLY | O_TRUNC | O_CREAT, def_fmode);
+ if (fd < 0) {
+ dprintf("%m\n");
+ goto out;
+ }
+
+ ret = xwrite(fd, buffer, len);
+ if (ret != len)
+ ret = -1;
+
+ close(fd);
+out:
+ strbuf_release(&buf);
+ return ret;
+}
--
1.7.8.rc3
More information about the sheepdog
mailing list