[Sheepdog] [PATCH v6 02/17] store: add dynamic mechanism to chain the available backend stores.

Liu Yuan namei.unix at gmail.com
Thu Jan 12 14:37:13 CET 2012


From: Liu Yuan <tailai.ly at taobao.com>

 - change global store structure to a pointer
 - use a list to maintain the stores.
 - use /obj/.store to remember backend store persistently.
 - now we can specify the backend store in the command
	collie cluster format -b farm #use farm
   if no store specified, currently sheep will use 'simple' store.
   if specified store not available, collie will return a list of
   available stores.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 collie/cluster.c         |   62 +++++++++++++++++++++++--
 collie/collie.c          |    1 +
 include/sheep.h          |    6 ++-
 include/sheepdog_proto.h |    3 +
 sheep/ops.c              |   30 ++++++++++++
 sheep/sheep_priv.h       |   25 +++++++++-
 sheep/simple_store.c     |   10 +---
 sheep/store.c            |  115 +++++++++++++++++++++++++++++++++++-----------
 8 files changed, 209 insertions(+), 43 deletions(-)

diff --git a/collie/cluster.c b/collie/cluster.c
index 6fbda6b..6b9cd4b 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -20,14 +20,56 @@ struct cluster_cmd_data {
 	int copies;
 	int nohalt;
 	int force;
+	char name[STORE_LEN];
 } cluster_cmd_data;
 
+#define DEFAULT_STORE	"simple"
+
 static void set_nohalt(uint16_t *p)
 {
 	if (p)
 		*p |= SD_FLAG_NOHALT;
 }
 
+static int list_store(void)
+{
+	int fd, ret;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	unsigned rlen, wlen;
+	char buf[512] = { 0 };
+
+	fd = connect_to(sdhost, sdport);
+	if (fd < 0)
+		return EXIT_SYSFAIL;
+
+	memset(&hdr, 0, sizeof(hdr));
+
+	wlen = 0;
+	rlen = 512;
+	hdr.opcode = SD_OP_GET_STORE_LIST;
+	hdr.data_length = rlen;
+
+	ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
+	close(fd);
+
+	if (ret) {
+		fprintf(stderr, "Failed to connect\n");
+		return EXIT_SYSFAIL;
+	}
+
+	if (rsp->result != SD_RES_SUCCESS) {
+		fprintf(stderr, "Restore failed: %s\n",
+				sd_strerror(rsp->result));
+		return EXIT_FAILURE;
+	}
+
+	printf("Available stores:\n");
+	printf("---------------------------------------\n");
+	printf("%s\n", buf);
+	return EXIT_SYSFAIL;
+}
+
 static int cluster_format(int argc, char **argv)
 {
 	int fd, ret;
@@ -35,6 +77,7 @@ static int cluster_format(int argc, char **argv)
 	struct sd_so_rsp *rsp = (struct sd_so_rsp *)&hdr;
 	unsigned rlen, wlen;
 	struct timeval tv;
+	char store_name[STORE_LEN];
 
 	fd = connect_to(sdhost, sdport);
 	if (fd < 0)
@@ -51,9 +94,15 @@ static int cluster_format(int argc, char **argv)
 	hdr.epoch = node_list_version;
 	hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
 
-	rlen = 0;
-	wlen = 0;
-	ret = exec_req(fd, (struct sd_req *)&hdr, NULL, &wlen, &rlen);
+	if (strlen(cluster_cmd_data.name))
+		strncpy(store_name, cluster_cmd_data.name, STORE_LEN);
+	else
+		strcpy(store_name, DEFAULT_STORE);
+	hdr.data_length = wlen = strlen(store_name) + 1;
+	hdr.flags = SD_FLAG_CMD_WRITE;
+
+	printf("using backend %s store\n", store_name);
+	ret = exec_req(fd, (struct sd_req *)&hdr, store_name, &wlen, &rlen);
 	close(fd);
 
 	if (ret) {
@@ -64,7 +113,7 @@ static int cluster_format(int argc, char **argv)
 	if (rsp->result != SD_RES_SUCCESS) {
 		fprintf(stderr, "Format failed: %s\n",
 				sd_strerror(rsp->result));
-		return EXIT_FAILURE;
+		return list_store();
 	}
 
 	return EXIT_SUCCESS;
@@ -237,7 +286,7 @@ static int cluster_recover(int argc, char **argv)
 static struct subcommand cluster_cmd[] = {
 	{"info", NULL, "aprh", "show cluster information",
 	 0, cluster_info},
-	{"format", NULL, "cHaph", "create a Sheepdog store",
+	{"format", NULL, "bcHaph", "create a Sheepdog store",
 	 0, cluster_format},
 	{"shutdown", NULL, "aph", "stop Sheepdog",
 	 SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown},
@@ -252,6 +301,9 @@ static int cluster_parser(int ch, char *opt)
 	char *p;
 
 	switch (ch) {
+	case 'b':
+		strncpy(cluster_cmd_data.name, opt, 10);
+		break;
 	case 'c':
 		copies = strtol(opt, &p, 10);
 		if (opt == p || copies < 1) {
diff --git a/collie/collie.c b/collie/collie.c
index 19cc9a9..baf7c67 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -40,6 +40,7 @@ static const struct sd_option collie_options[] = {
 	{'d', "delete", 0, "delete a key"},
 
 	/* cluster options */
+	{'b', "store", 1, "specify backend store"},
 	{'c', "copies", 1, "specify the data redundancy (number of copies)"},
 	{'H', "nohalt", 0, "serve IO requests even if there are too few\n\
                           nodes for the configured redundancy"},
diff --git a/include/sheep.h b/include/sheep.h
index 906c1f5..bbca7f8 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -27,7 +27,7 @@
 #define SD_MAX_VNODES 65536
 #define SD_MAX_VMS   4096 /* FIXME: should be removed */
 
-#define SD_OP_SHEEP         0x80
+#define SD_OP_SHEEP          0x80
 #define SD_OP_DEL_VDI        0x81
 #define SD_OP_GET_NODE_LIST  0x82
 #define SD_OP_GET_VM_LIST    0x83
@@ -37,7 +37,8 @@
 #define SD_OP_STAT_CLUSTER   0x87
 #define SD_OP_KILL_NODE      0x88
 #define SD_OP_GET_VDI_ATTR   0x89
-#define SD_OP_RECOVER	     0x8A
+#define SD_OP_RECOVER        0x8a
+#define SD_OP_GET_STORE_LIST 0x90
 
 #define SD_FLAG_CMD_IO_LOCAL   0x0010
 #define SD_FLAG_CMD_RECOVERY 0x0020
@@ -263,6 +264,7 @@ static inline const char *sd_strerror(int err)
 		{SD_RES_JOIN_FAILED, "Node has failed to join cluster"},
 		{SD_RES_HALT, "IO has halted as there are too few living nodes"},
 		{SD_RES_MANUAL_RECOVER, "Cluster is running/halted and cannot be manually recovered"},
+		{SD_RES_NO_STORE, "Targeted backend store is not found"},
 
 		{SD_RES_OLD_NODE_VER, "Remote node has an old epoch"},
 		{SD_RES_NEW_NODE_VER, "Remote node has a new epoch"},
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index b664223..de326ee 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -59,6 +59,7 @@
 #define SD_RES_JOIN_FAILED   0x18 /* Target node had failed to join sheepdog */
 #define SD_RES_HALT 0x19 /* Sheepdog is stopped doing IO */
 #define SD_RES_MANUAL_RECOVER   0x1A /* Users should not manually recover this cluster */
+#define SD_RES_NO_STORE         0x20 /* No targeted backend store */
 
 /*
  * Object ID rules
@@ -91,6 +92,8 @@
 #define SD_ATTR_OBJ_SIZE (sizeof(struct sheepdog_vdi_attr))
 #define CURRENT_VDI_ID 0
 
+#define STORE_LEN 16
+
 struct sd_req {
 	uint8_t		proto_ver;
 	uint8_t		opcode;
diff --git a/sheep/ops.c b/sheep/ops.c
index 13ecdf2..3f65477 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -12,6 +12,10 @@
 #include <stdlib.h>
 
 #include "sheep_priv.h"
+#include "strbuf.h"
+
+extern char *obj_path;
+extern struct store_driver *sd_store;
 
 enum sd_op_type {
 	SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
@@ -132,6 +136,11 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
 	int i, latest_epoch, ret;
 	uint64_t ctime;
 
+	sd_store = find_store_driver(data);
+	if (!sd_store)
+		return SD_RES_NO_STORE;
+
+	sd_store->init(obj_path);
 	sys->nr_sobjs = hdr->copies;
 	sys->flags = hdr->flags;
 	if (!sys->nr_sobjs)
@@ -212,6 +221,21 @@ static int cluster_get_vdi_attr(const struct sd_req *req, struct sd_rsp *rsp,
 	return ret;
 }
 
+static int local_get_store_list(const struct sd_req *req, struct sd_rsp *rsp,
+				void *data)
+{
+	struct strbuf buf = STRBUF_INIT;
+	struct store_driver *driver;
+
+	list_for_each_entry(driver, &store_drivers, list) {
+		strbuf_addf(&buf, "%s ", driver->name);
+	}
+	memcpy(data, buf.buf, buf.len);
+
+	strbuf_release(&buf);
+	return SD_RES_SUCCESS;
+}
+
 static int local_read_vdis(const struct sd_req *req, struct sd_rsp *rsp,
 			   void *data)
 {
@@ -434,6 +458,12 @@ static struct sd_op_template sd_ops[] = {
 	},
 
 	/* local operations */
+	[SD_OP_GET_STORE_LIST] = {
+		.type = SD_OP_TYPE_LOCAL,
+		.force = 1,
+		.process_work = local_get_store_list,
+	},
+
 	[SD_OP_READ_VDIS] = {
 		.type = SD_OP_TYPE_LOCAL,
 		.force = 1,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 5145f06..0a25c7d 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -166,7 +166,8 @@ struct siocb {
 };
 
 struct store_driver {
-	const char *driver_name;
+	struct list_head list;
+	const char *name;
 	int (*init)(char *path);
 	int (*open)(uint64_t oid, struct siocb *, int create);
 	int (*write)(uint64_t oid, struct siocb *);
@@ -178,7 +179,22 @@ struct store_driver {
 	int (*atomic_put)(uint64_t oid, struct siocb *);
 };
 
-extern void register_store_driver(struct store_driver *);
+extern struct list_head store_drivers;
+#define add_store_driver(driver)                                 \
+static void __attribute__((constructor)) add_ ## driver(void) {  \
+        list_add(&driver.list, &store_drivers);                  \
+}
+
+static inline struct store_driver *find_store_driver(const char *name)
+{
+	struct store_driver *driver;
+
+	list_for_each_entry(driver, &store_drivers, list) {
+		if (strcmp(driver->name, name) == 0)
+			return driver;
+	}
+	return NULL;
+}
 
 extern struct cluster_info *sys;
 
@@ -236,12 +252,17 @@ int set_cluster_copies(uint8_t copies);
 int get_cluster_copies(uint8_t *copies);
 int set_cluster_flags(uint16_t flags);
 int get_cluster_flags(uint16_t *flags);
+int set_cluster_store(const uint8_t *name);
+int get_cluster_store(uint8_t *buf);
 
 int store_create_and_write_obj(const struct sd_req *, struct sd_rsp *, void *);
 int store_write_obj(const struct sd_req *, struct sd_rsp *, void *);
 int store_read_obj(const struct sd_req *, struct sd_rsp *, void *);
 int store_remove_obj(const struct sd_req *, struct sd_rsp *, void *);
 
+int store_file_write(void *buffer, size_t len);
+void *store_file_read(void);
+
 #define NR_GW_WORKER_THREAD 4
 #define NR_IO_WORKER_THREAD 4
 
diff --git a/sheep/simple_store.c b/sheep/simple_store.c
index ddaa781..a5711c1 100644
--- a/sheep/simple_store.c
+++ b/sheep/simple_store.c
@@ -239,8 +239,8 @@ out:
 	return ret;
 }
 
-struct store_driver store = {
-	.driver_name = "simple",
+struct store_driver simple_store = {
+	.name = "simple",
 	.init = simple_store_init,
 	.open = simple_store_open,
 	.write = simple_store_write,
@@ -251,8 +251,4 @@ struct store_driver store = {
 	.atomic_put = simple_store_atomic_put,
 };
 
-void register_store_driver(struct store_driver *driver)
-{
-	store = *driver;
-	eprintf("Register %s store driver\n", store.driver_name);
-}
+add_store_driver(simple_store);
diff --git a/sheep/store.c b/sheep/store.c
index 3323fbc..fa2849e 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -31,7 +31,7 @@ struct sheepdog_config {
 	uint64_t ctime;
 	uint16_t flags;
 	uint8_t copies;
-	uint8_t pad[3];
+	uint8_t store[STORE_LEN];
 };
 
 char *obj_path;
@@ -43,7 +43,8 @@ static char *config_path;
 static mode_t def_dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP;
 mode_t def_fmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
 
-extern struct store_driver store;
+struct store_driver *sd_store;
+LIST_HEAD(store_drivers);
 
 static int obj_cmp(const void *oid1, const void *oid2)
 {
@@ -59,7 +60,7 @@ static int obj_cmp(const void *oid1, const void *oid2)
 
 static void get_store_dir(struct strbuf *buf, int epoch)
 {
-	if (!strcmp(store.name, "simple"))
+	if (!strcmp(sd_store->name, "simple"))
 		strbuf_addf(buf, "%s%08u", obj_path, epoch);
 	else /* XXX assume other store doesn't need epoch/obj pattern */
 		strbuf_addf(buf, "%s", obj_path);
@@ -138,7 +139,7 @@ int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *d
 		iocb.buf = buf;
 		iocb.length = 0;
 		iocb.epoch = i;
-		store.get_objlist(&iocb);
+		sd_store->get_objlist(&iocb);
 		nr = merge_objlist(list, nr, (uint64_t *)iocb.buf, iocb.length);
 	}
 out:
@@ -173,17 +174,17 @@ static int read_copy_from_cluster(struct request *req, uint32_t epoch,
 		if (is_myself(e[n].addr, e[n].port)) {
 			memset(&iocb, 0, sizeof(iocb));
 			iocb.epoch = epoch;
-			ret = store.open(oid, &iocb, 0);
+			ret = sd_store->open(oid, &iocb, 0);
 			if (ret != SD_RES_SUCCESS)
 				continue;
 
 			iocb.buf = buf;
 			iocb.length = SD_DATA_OBJ_SIZE;
 			iocb.offset = 0;
-			ret = store.read(oid, &iocb);
+			ret = sd_store->read(oid, &iocb);
 			if (ret != SD_RES_SUCCESS)
 				continue;
-			store.close(oid, &iocb);
+			sd_store->close(oid, &iocb);
 			goto out;
 		}
 
@@ -426,7 +427,7 @@ out:
 
 int update_epoch_store(uint32_t epoch)
 {
-	if (!strcmp(store.name, "simple")) {
+	if (!strcmp(sd_store->name, "simple")) {
 		char new[1024];
 
 		snprintf(new, sizeof(new), "%s%08u/", obj_path, epoch);
@@ -565,21 +566,21 @@ int store_read_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.epoch = epoch;
 	iocb.flags = hdr->flags;
-	ret = store.open(hdr->oid, &iocb, 0);
+	ret = sd_store->open(hdr->oid, &iocb, 0);
 	if (ret != SD_RES_SUCCESS)
 		return ret;
 
 	iocb.buf = request->data;
 	iocb.length = hdr->data_length;
 	iocb.offset = hdr->offset;
-	ret = store.read(hdr->oid, &iocb);
+	ret = sd_store->read(hdr->oid, &iocb);
 	if (ret != SD_RES_SUCCESS)
 		goto out;
 
 	rsps->data_length = hdr->data_length;
 	rsps->copies = sys->nr_sobjs;
 out:
-	store.close(hdr->oid, &iocb);
+	sd_store->close(hdr->oid, &iocb);
 	return ret;
 }
 
@@ -604,11 +605,11 @@ static int do_write_obj(struct siocb *iocb, struct sd_obj_req *req, uint32_t epo
 			strbuf_release(&buf);
 			return SD_RES_EIO;
 		}
-		ret = store.write(oid, iocb);
+		ret = sd_store->write(oid, iocb);
 		jrnl_end(jd);
 		strbuf_release(&buf);
 	} else
-		ret = store.write(oid, iocb);
+		ret = sd_store->write(oid, iocb);
 
 	return ret;
 }
@@ -624,13 +625,13 @@ int store_write_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.epoch = epoch;
 	iocb.flags = hdr->flags;
-	ret = store.open(hdr->oid, &iocb, 0);
+	ret = sd_store->open(hdr->oid, &iocb, 0);
 	if (ret != SD_RES_SUCCESS)
 		return ret;
 
 	ret = do_write_obj(&iocb, hdr, epoch, request->data);
 
-	store.close(hdr->oid, &iocb);
+	sd_store->close(hdr->oid, &iocb);
 	return ret;
 }
 
@@ -651,7 +652,7 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.epoch = epoch;
 	iocb.flags = hdr->flags;
-	ret = store.open(hdr->oid, &iocb, 1);
+	ret = sd_store->open(hdr->oid, &iocb, 1);
 	if (ret != SD_RES_SUCCESS)
 		return ret;
 	if (hdr->flags & SD_FLAG_CMD_COW) {
@@ -666,14 +667,14 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi
 		iocb.buf = buf;
 		iocb.length = SD_DATA_OBJ_SIZE;
 		iocb.offset = 0;
-		ret = store.write(hdr->oid, &iocb);
+		ret = sd_store->write(hdr->oid, &iocb);
 		if (ret != SD_RES_SUCCESS)
 			goto out;
 	}
 	ret = do_write_obj(&iocb, hdr, epoch, request->data);
 out:
 	free(buf);
-	store.close(hdr->oid, &iocb);
+	sd_store->close(hdr->oid, &iocb);
 	return ret;
 }
 
@@ -1228,7 +1229,7 @@ static int recover_object_from_replica(uint64_t oid,
 
 	if (is_myself(entry->addr, entry->port)) {
 		iocb.epoch = epoch;
-		ret = store.link(oid, &iocb, tgt_epoch);
+		ret = sd_store->link(oid, &iocb, tgt_epoch);
 		if (ret == SD_RES_SUCCESS) {
 			ret = 0;
 			goto done;
@@ -1277,7 +1278,7 @@ static int recover_object_from_replica(uint64_t oid,
 		iocb.epoch = epoch;
 		iocb.length = rlen;
 		iocb.buf = buf;
-		ret = store.atomic_put(oid, &iocb);
+		ret = sd_store->atomic_put(oid, &iocb);
 		if (ret!= SD_RES_SUCCESS) {
 			ret = -1;
 			goto out;
@@ -1410,9 +1411,9 @@ static void recover_object(struct work *work)
 	eprintf("done:%"PRIu32" count:%"PRIu32", oid:%"PRIx64"\n", rw->done, rw->count, oid);
 
 	iocb.epoch = epoch;
-	ret = store.open(oid, &iocb, 0);
+	ret = sd_store->open(oid, &iocb, 0);
 	if (ret == SD_RES_SUCCESS) {
-		store.close(oid, &iocb);
+		sd_store->close(oid, &iocb);
 		dprintf("the object is already recovered\n");
 		return;
 	}
@@ -1494,10 +1495,10 @@ int is_recoverying_oid(uint64_t oid)
 
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.epoch = sys->epoch;
-	ret = store.open(oid, &iocb, 0);
+	ret = sd_store->open(oid, &iocb, 0);
 	if (ret == SD_RES_SUCCESS) {
 		dprintf("the object %" PRIx64 " is already recoverd\n", oid);
-		store.close(oid, &iocb);
+		sd_store->close(oid, &iocb);
 		return 0;
 	}
 
@@ -1993,6 +1994,7 @@ static int init_config_path(const char *base_path)
 int init_store(const char *d)
 {
 	int ret;
+	uint8_t driver_name[STORE_LEN];
 
 	ret = init_obj_path(d);
 	if (ret)
@@ -2014,10 +2016,19 @@ int init_store(const char *d)
 	if (ret)
 		return ret;
 
-	ret = store.init(obj_path);
-	if (ret)
-		return ret;
+	ret = get_cluster_store(driver_name);
+	if (ret != SD_RES_SUCCESS)
+		return 1;
+
+	if (strlen((char *)driver_name))
+		sd_store = find_store_driver((char *)driver_name);
 
+	if (sd_store) {
+		ret = sd_store->init(obj_path);
+		if (ret != SD_RES_SUCCESS)
+			return ret;
+	} else
+		dprintf("no store found\n");
 	return ret;
 }
 
@@ -2134,3 +2145,53 @@ int get_cluster_flags(uint16_t *flags)
 out:
 	return ret;
 }
+
+int set_cluster_store(const uint8_t *name)
+{
+	int fd, ret = SD_RES_EIO, len;
+	void *jd;
+
+	fd = open(config_path, O_DSYNC | O_WRONLY);
+	if (fd < 0)
+		goto out;
+
+	len = strlen((char *)name) + 1;
+	jd = jrnl_begin((void *)name, len,
+			offsetof(struct sheepdog_config, store),
+			config_path, jrnl_path);
+	if (!jd) {
+		ret = SD_RES_EIO;
+		goto err;
+	}
+	ret = xpwrite(fd, name, len, offsetof(struct sheepdog_config, store));
+	if (ret != len)
+		ret = SD_RES_EIO;
+	else
+		ret = SD_RES_SUCCESS;
+	jrnl_end(jd);
+err:
+	close(fd);
+out:
+	return ret;
+}
+
+int get_cluster_store(uint8_t *buf)
+{
+	int fd, ret = SD_RES_EIO;
+
+	fd = open(config_path, O_RDONLY);
+	if (fd < 0)
+		goto out;
+
+	ret = pread(fd, buf, STORE_LEN,
+		    offsetof(struct sheepdog_config, store));
+
+	if (ret == -1)
+		ret = SD_RES_EIO;
+	else
+		ret = SD_RES_SUCCESS;
+
+	close(fd);
+out:
+	return ret;
+}
-- 
1.7.8.2




More information about the sheepdog mailing list