[Sheepdog] [PATCH v6 02/17] store: add dynamic mechanism to chain the available backend stores.
Liu Yuan
namei.unix at gmail.com
Thu Jan 12 14:37:13 CET 2012
From: Liu Yuan <tailai.ly at taobao.com>
- change global store structure to a pointer
- use a list to maintain the stores.
- use /obj/.store to remember backend store persistently.
- now we can specify the backend store in the command
collie cluster format -b farm #use farm
if no store specified, currently sheep will use 'simple' store.
if specified store not available, collie will return a list of
available stores.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
collie/cluster.c | 62 +++++++++++++++++++++++--
collie/collie.c | 1 +
include/sheep.h | 6 ++-
include/sheepdog_proto.h | 3 +
sheep/ops.c | 30 ++++++++++++
sheep/sheep_priv.h | 25 +++++++++-
sheep/simple_store.c | 10 +---
sheep/store.c | 115 +++++++++++++++++++++++++++++++++++-----------
8 files changed, 209 insertions(+), 43 deletions(-)
diff --git a/collie/cluster.c b/collie/cluster.c
index 6fbda6b..6b9cd4b 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -20,14 +20,56 @@ struct cluster_cmd_data {
int copies;
int nohalt;
int force;
+ char name[STORE_LEN];
} cluster_cmd_data;
+#define DEFAULT_STORE "simple"
+
static void set_nohalt(uint16_t *p)
{
if (p)
*p |= SD_FLAG_NOHALT;
}
+static int list_store(void)
+{
+ int fd, ret;
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+ unsigned rlen, wlen;
+ char buf[512] = { 0 };
+
+ fd = connect_to(sdhost, sdport);
+ if (fd < 0)
+ return EXIT_SYSFAIL;
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ wlen = 0;
+ rlen = 512;
+ hdr.opcode = SD_OP_GET_STORE_LIST;
+ hdr.data_length = rlen;
+
+ ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
+ close(fd);
+
+ if (ret) {
+ fprintf(stderr, "Failed to connect\n");
+ return EXIT_SYSFAIL;
+ }
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ fprintf(stderr, "Restore failed: %s\n",
+ sd_strerror(rsp->result));
+ return EXIT_FAILURE;
+ }
+
+ printf("Available stores:\n");
+ printf("---------------------------------------\n");
+ printf("%s\n", buf);
+ return EXIT_SYSFAIL;
+}
+
static int cluster_format(int argc, char **argv)
{
int fd, ret;
@@ -35,6 +77,7 @@ static int cluster_format(int argc, char **argv)
struct sd_so_rsp *rsp = (struct sd_so_rsp *)&hdr;
unsigned rlen, wlen;
struct timeval tv;
+ char store_name[STORE_LEN];
fd = connect_to(sdhost, sdport);
if (fd < 0)
@@ -51,9 +94,15 @@ static int cluster_format(int argc, char **argv)
hdr.epoch = node_list_version;
hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
- rlen = 0;
- wlen = 0;
- ret = exec_req(fd, (struct sd_req *)&hdr, NULL, &wlen, &rlen);
+ if (strlen(cluster_cmd_data.name))
+ strncpy(store_name, cluster_cmd_data.name, STORE_LEN);
+ else
+ strcpy(store_name, DEFAULT_STORE);
+ hdr.data_length = wlen = strlen(store_name) + 1;
+ hdr.flags = SD_FLAG_CMD_WRITE;
+
+ printf("using backend %s store\n", store_name);
+ ret = exec_req(fd, (struct sd_req *)&hdr, store_name, &wlen, &rlen);
close(fd);
if (ret) {
@@ -64,7 +113,7 @@ static int cluster_format(int argc, char **argv)
if (rsp->result != SD_RES_SUCCESS) {
fprintf(stderr, "Format failed: %s\n",
sd_strerror(rsp->result));
- return EXIT_FAILURE;
+ return list_store();
}
return EXIT_SUCCESS;
@@ -237,7 +286,7 @@ static int cluster_recover(int argc, char **argv)
static struct subcommand cluster_cmd[] = {
{"info", NULL, "aprh", "show cluster information",
0, cluster_info},
- {"format", NULL, "cHaph", "create a Sheepdog store",
+ {"format", NULL, "bcHaph", "create a Sheepdog store",
0, cluster_format},
{"shutdown", NULL, "aph", "stop Sheepdog",
SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown},
@@ -252,6 +301,9 @@ static int cluster_parser(int ch, char *opt)
char *p;
switch (ch) {
+ case 'b':
+ strncpy(cluster_cmd_data.name, opt, 10);
+ break;
case 'c':
copies = strtol(opt, &p, 10);
if (opt == p || copies < 1) {
diff --git a/collie/collie.c b/collie/collie.c
index 19cc9a9..baf7c67 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -40,6 +40,7 @@ static const struct sd_option collie_options[] = {
{'d', "delete", 0, "delete a key"},
/* cluster options */
+ {'b', "store", 1, "specify backend store"},
{'c', "copies", 1, "specify the data redundancy (number of copies)"},
{'H', "nohalt", 0, "serve IO requests even if there are too few\n\
nodes for the configured redundancy"},
diff --git a/include/sheep.h b/include/sheep.h
index 906c1f5..bbca7f8 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -27,7 +27,7 @@
#define SD_MAX_VNODES 65536
#define SD_MAX_VMS 4096 /* FIXME: should be removed */
-#define SD_OP_SHEEP 0x80
+#define SD_OP_SHEEP 0x80
#define SD_OP_DEL_VDI 0x81
#define SD_OP_GET_NODE_LIST 0x82
#define SD_OP_GET_VM_LIST 0x83
@@ -37,7 +37,8 @@
#define SD_OP_STAT_CLUSTER 0x87
#define SD_OP_KILL_NODE 0x88
#define SD_OP_GET_VDI_ATTR 0x89
-#define SD_OP_RECOVER 0x8A
+#define SD_OP_RECOVER 0x8a
+#define SD_OP_GET_STORE_LIST 0x90
#define SD_FLAG_CMD_IO_LOCAL 0x0010
#define SD_FLAG_CMD_RECOVERY 0x0020
@@ -263,6 +264,7 @@ static inline const char *sd_strerror(int err)
{SD_RES_JOIN_FAILED, "Node has failed to join cluster"},
{SD_RES_HALT, "IO has halted as there are too few living nodes"},
{SD_RES_MANUAL_RECOVER, "Cluster is running/halted and cannot be manually recovered"},
+ {SD_RES_NO_STORE, "Targeted backend store is not found"},
{SD_RES_OLD_NODE_VER, "Remote node has an old epoch"},
{SD_RES_NEW_NODE_VER, "Remote node has a new epoch"},
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index b664223..de326ee 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -59,6 +59,7 @@
#define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */
#define SD_RES_HALT 0x19 /* Sheepdog is stopped doing IO */
#define SD_RES_MANUAL_RECOVER 0x1A /* Users should not manually recover this cluster */
+#define SD_RES_NO_STORE 0x20 /* No targeted backend store */
/*
* Object ID rules
@@ -91,6 +92,8 @@
#define SD_ATTR_OBJ_SIZE (sizeof(struct sheepdog_vdi_attr))
#define CURRENT_VDI_ID 0
+#define STORE_LEN 16
+
struct sd_req {
uint8_t proto_ver;
uint8_t opcode;
diff --git a/sheep/ops.c b/sheep/ops.c
index 13ecdf2..3f65477 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -12,6 +12,10 @@
#include <stdlib.h>
#include "sheep_priv.h"
+#include "strbuf.h"
+
+extern char *obj_path;
+extern struct store_driver *sd_store;
enum sd_op_type {
SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
@@ -132,6 +136,11 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
int i, latest_epoch, ret;
uint64_t ctime;
+ sd_store = find_store_driver(data);
+ if (!sd_store)
+ return SD_RES_NO_STORE;
+
+ sd_store->init(obj_path);
sys->nr_sobjs = hdr->copies;
sys->flags = hdr->flags;
if (!sys->nr_sobjs)
@@ -212,6 +221,21 @@ static int cluster_get_vdi_attr(const struct sd_req *req, struct sd_rsp *rsp,
return ret;
}
+static int local_get_store_list(const struct sd_req *req, struct sd_rsp *rsp,
+ void *data)
+{
+ struct strbuf buf = STRBUF_INIT;
+ struct store_driver *driver;
+
+ list_for_each_entry(driver, &store_drivers, list) {
+ strbuf_addf(&buf, "%s ", driver->name);
+ }
+ memcpy(data, buf.buf, buf.len);
+
+ strbuf_release(&buf);
+ return SD_RES_SUCCESS;
+}
+
static int local_read_vdis(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
@@ -434,6 +458,12 @@ static struct sd_op_template sd_ops[] = {
},
/* local operations */
+ [SD_OP_GET_STORE_LIST] = {
+ .type = SD_OP_TYPE_LOCAL,
+ .force = 1,
+ .process_work = local_get_store_list,
+ },
+
[SD_OP_READ_VDIS] = {
.type = SD_OP_TYPE_LOCAL,
.force = 1,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 5145f06..0a25c7d 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -166,7 +166,8 @@ struct siocb {
};
struct store_driver {
- const char *driver_name;
+ struct list_head list;
+ const char *name;
int (*init)(char *path);
int (*open)(uint64_t oid, struct siocb *, int create);
int (*write)(uint64_t oid, struct siocb *);
@@ -178,7 +179,22 @@ struct store_driver {
int (*atomic_put)(uint64_t oid, struct siocb *);
};
-extern void register_store_driver(struct store_driver *);
+extern struct list_head store_drivers;
+#define add_store_driver(driver) \
+static void __attribute__((constructor)) add_ ## driver(void) { \
+ list_add(&driver.list, &store_drivers); \
+}
+
+static inline struct store_driver *find_store_driver(const char *name)
+{
+ struct store_driver *driver;
+
+ list_for_each_entry(driver, &store_drivers, list) {
+ if (strcmp(driver->name, name) == 0)
+ return driver;
+ }
+ return NULL;
+}
extern struct cluster_info *sys;
@@ -236,12 +252,17 @@ int set_cluster_copies(uint8_t copies);
int get_cluster_copies(uint8_t *copies);
int set_cluster_flags(uint16_t flags);
int get_cluster_flags(uint16_t *flags);
+int set_cluster_store(const uint8_t *name);
+int get_cluster_store(uint8_t *buf);
int store_create_and_write_obj(const struct sd_req *, struct sd_rsp *, void *);
int store_write_obj(const struct sd_req *, struct sd_rsp *, void *);
int store_read_obj(const struct sd_req *, struct sd_rsp *, void *);
int store_remove_obj(const struct sd_req *, struct sd_rsp *, void *);
+int store_file_write(void *buffer, size_t len);
+void *store_file_read(void);
+
#define NR_GW_WORKER_THREAD 4
#define NR_IO_WORKER_THREAD 4
diff --git a/sheep/simple_store.c b/sheep/simple_store.c
index ddaa781..a5711c1 100644
--- a/sheep/simple_store.c
+++ b/sheep/simple_store.c
@@ -239,8 +239,8 @@ out:
return ret;
}
-struct store_driver store = {
- .driver_name = "simple",
+struct store_driver simple_store = {
+ .name = "simple",
.init = simple_store_init,
.open = simple_store_open,
.write = simple_store_write,
@@ -251,8 +251,4 @@ struct store_driver store = {
.atomic_put = simple_store_atomic_put,
};
-void register_store_driver(struct store_driver *driver)
-{
- store = *driver;
- eprintf("Register %s store driver\n", store.driver_name);
-}
+add_store_driver(simple_store);
diff --git a/sheep/store.c b/sheep/store.c
index 3323fbc..fa2849e 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -31,7 +31,7 @@ struct sheepdog_config {
uint64_t ctime;
uint16_t flags;
uint8_t copies;
- uint8_t pad[3];
+ uint8_t store[STORE_LEN];
};
char *obj_path;
@@ -43,7 +43,8 @@ static char *config_path;
static mode_t def_dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP;
mode_t def_fmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
-extern struct store_driver store;
+struct store_driver *sd_store;
+LIST_HEAD(store_drivers);
static int obj_cmp(const void *oid1, const void *oid2)
{
@@ -59,7 +60,7 @@ static int obj_cmp(const void *oid1, const void *oid2)
static void get_store_dir(struct strbuf *buf, int epoch)
{
- if (!strcmp(store.name, "simple"))
+ if (!strcmp(sd_store->name, "simple"))
strbuf_addf(buf, "%s%08u", obj_path, epoch);
else /* XXX assume other store doesn't need epoch/obj pattern */
strbuf_addf(buf, "%s", obj_path);
@@ -138,7 +139,7 @@ int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *d
iocb.buf = buf;
iocb.length = 0;
iocb.epoch = i;
- store.get_objlist(&iocb);
+ sd_store->get_objlist(&iocb);
nr = merge_objlist(list, nr, (uint64_t *)iocb.buf, iocb.length);
}
out:
@@ -173,17 +174,17 @@ static int read_copy_from_cluster(struct request *req, uint32_t epoch,
if (is_myself(e[n].addr, e[n].port)) {
memset(&iocb, 0, sizeof(iocb));
iocb.epoch = epoch;
- ret = store.open(oid, &iocb, 0);
+ ret = sd_store->open(oid, &iocb, 0);
if (ret != SD_RES_SUCCESS)
continue;
iocb.buf = buf;
iocb.length = SD_DATA_OBJ_SIZE;
iocb.offset = 0;
- ret = store.read(oid, &iocb);
+ ret = sd_store->read(oid, &iocb);
if (ret != SD_RES_SUCCESS)
continue;
- store.close(oid, &iocb);
+ sd_store->close(oid, &iocb);
goto out;
}
@@ -426,7 +427,7 @@ out:
int update_epoch_store(uint32_t epoch)
{
- if (!strcmp(store.name, "simple")) {
+ if (!strcmp(sd_store->name, "simple")) {
char new[1024];
snprintf(new, sizeof(new), "%s%08u/", obj_path, epoch);
@@ -565,21 +566,21 @@ int store_read_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
memset(&iocb, 0, sizeof(iocb));
iocb.epoch = epoch;
iocb.flags = hdr->flags;
- ret = store.open(hdr->oid, &iocb, 0);
+ ret = sd_store->open(hdr->oid, &iocb, 0);
if (ret != SD_RES_SUCCESS)
return ret;
iocb.buf = request->data;
iocb.length = hdr->data_length;
iocb.offset = hdr->offset;
- ret = store.read(hdr->oid, &iocb);
+ ret = sd_store->read(hdr->oid, &iocb);
if (ret != SD_RES_SUCCESS)
goto out;
rsps->data_length = hdr->data_length;
rsps->copies = sys->nr_sobjs;
out:
- store.close(hdr->oid, &iocb);
+ sd_store->close(hdr->oid, &iocb);
return ret;
}
@@ -604,11 +605,11 @@ static int do_write_obj(struct siocb *iocb, struct sd_obj_req *req, uint32_t epo
strbuf_release(&buf);
return SD_RES_EIO;
}
- ret = store.write(oid, iocb);
+ ret = sd_store->write(oid, iocb);
jrnl_end(jd);
strbuf_release(&buf);
} else
- ret = store.write(oid, iocb);
+ ret = sd_store->write(oid, iocb);
return ret;
}
@@ -624,13 +625,13 @@ int store_write_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
memset(&iocb, 0, sizeof(iocb));
iocb.epoch = epoch;
iocb.flags = hdr->flags;
- ret = store.open(hdr->oid, &iocb, 0);
+ ret = sd_store->open(hdr->oid, &iocb, 0);
if (ret != SD_RES_SUCCESS)
return ret;
ret = do_write_obj(&iocb, hdr, epoch, request->data);
- store.close(hdr->oid, &iocb);
+ sd_store->close(hdr->oid, &iocb);
return ret;
}
@@ -651,7 +652,7 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi
memset(&iocb, 0, sizeof(iocb));
iocb.epoch = epoch;
iocb.flags = hdr->flags;
- ret = store.open(hdr->oid, &iocb, 1);
+ ret = sd_store->open(hdr->oid, &iocb, 1);
if (ret != SD_RES_SUCCESS)
return ret;
if (hdr->flags & SD_FLAG_CMD_COW) {
@@ -666,14 +667,14 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi
iocb.buf = buf;
iocb.length = SD_DATA_OBJ_SIZE;
iocb.offset = 0;
- ret = store.write(hdr->oid, &iocb);
+ ret = sd_store->write(hdr->oid, &iocb);
if (ret != SD_RES_SUCCESS)
goto out;
}
ret = do_write_obj(&iocb, hdr, epoch, request->data);
out:
free(buf);
- store.close(hdr->oid, &iocb);
+ sd_store->close(hdr->oid, &iocb);
return ret;
}
@@ -1228,7 +1229,7 @@ static int recover_object_from_replica(uint64_t oid,
if (is_myself(entry->addr, entry->port)) {
iocb.epoch = epoch;
- ret = store.link(oid, &iocb, tgt_epoch);
+ ret = sd_store->link(oid, &iocb, tgt_epoch);
if (ret == SD_RES_SUCCESS) {
ret = 0;
goto done;
@@ -1277,7 +1278,7 @@ static int recover_object_from_replica(uint64_t oid,
iocb.epoch = epoch;
iocb.length = rlen;
iocb.buf = buf;
- ret = store.atomic_put(oid, &iocb);
+ ret = sd_store->atomic_put(oid, &iocb);
if (ret!= SD_RES_SUCCESS) {
ret = -1;
goto out;
@@ -1410,9 +1411,9 @@ static void recover_object(struct work *work)
eprintf("done:%"PRIu32" count:%"PRIu32", oid:%"PRIx64"\n", rw->done, rw->count, oid);
iocb.epoch = epoch;
- ret = store.open(oid, &iocb, 0);
+ ret = sd_store->open(oid, &iocb, 0);
if (ret == SD_RES_SUCCESS) {
- store.close(oid, &iocb);
+ sd_store->close(oid, &iocb);
dprintf("the object is already recovered\n");
return;
}
@@ -1494,10 +1495,10 @@ int is_recoverying_oid(uint64_t oid)
memset(&iocb, 0, sizeof(iocb));
iocb.epoch = sys->epoch;
- ret = store.open(oid, &iocb, 0);
+ ret = sd_store->open(oid, &iocb, 0);
if (ret == SD_RES_SUCCESS) {
dprintf("the object %" PRIx64 " is already recoverd\n", oid);
- store.close(oid, &iocb);
+ sd_store->close(oid, &iocb);
return 0;
}
@@ -1993,6 +1994,7 @@ static int init_config_path(const char *base_path)
int init_store(const char *d)
{
int ret;
+ uint8_t driver_name[STORE_LEN];
ret = init_obj_path(d);
if (ret)
@@ -2014,10 +2016,19 @@ int init_store(const char *d)
if (ret)
return ret;
- ret = store.init(obj_path);
- if (ret)
- return ret;
+ ret = get_cluster_store(driver_name);
+ if (ret != SD_RES_SUCCESS)
+ return 1;
+
+ if (strlen((char *)driver_name))
+ sd_store = find_store_driver((char *)driver_name);
+ if (sd_store) {
+ ret = sd_store->init(obj_path);
+ if (ret != SD_RES_SUCCESS)
+ return ret;
+ } else
+ dprintf("no store found\n");
return ret;
}
@@ -2134,3 +2145,53 @@ int get_cluster_flags(uint16_t *flags)
out:
return ret;
}
+
+int set_cluster_store(const uint8_t *name)
+{
+ int fd, ret = SD_RES_EIO, len;
+ void *jd;
+
+ fd = open(config_path, O_DSYNC | O_WRONLY);
+ if (fd < 0)
+ goto out;
+
+ len = strlen((char *)name) + 1;
+ jd = jrnl_begin((void *)name, len,
+ offsetof(struct sheepdog_config, store),
+ config_path, jrnl_path);
+ if (!jd) {
+ ret = SD_RES_EIO;
+ goto err;
+ }
+ ret = xpwrite(fd, name, len, offsetof(struct sheepdog_config, store));
+ if (ret != len)
+ ret = SD_RES_EIO;
+ else
+ ret = SD_RES_SUCCESS;
+ jrnl_end(jd);
+err:
+ close(fd);
+out:
+ return ret;
+}
+
+int get_cluster_store(uint8_t *buf)
+{
+ int fd, ret = SD_RES_EIO;
+
+ fd = open(config_path, O_RDONLY);
+ if (fd < 0)
+ goto out;
+
+ ret = pread(fd, buf, STORE_LEN,
+ offsetof(struct sheepdog_config, store));
+
+ if (ret == -1)
+ ret = SD_RES_EIO;
+ else
+ ret = SD_RES_SUCCESS;
+
+ close(fd);
+out:
+ return ret;
+}
--
1.7.8.2
More information about the sheepdog
mailing list