[Sheepdog] [PATCH v5 02/17] store: add dynamic mechanism to chain the available backend stores.

Liu Yuan namei.unix at gmail.com
Fri Dec 30 14:06:57 CET 2011


From: Liu Yuan <tailai.ly at taobao.com>

 - change global store structure to a pointer
 - use a list to maintain the stores.
 - use /obj/.store to remember backend store persistently.
 - now we can specify the backend store in the command
	collie cluster format -b farm #use farm
   if no store specified, currently sheep will use 'simple' store.
   if specified store not available, collie will return a list of
   available stores.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 collie/cluster.c         |   59 +++++++++++++++++++++++-
 collie/collie.c          |    1 +
 include/sheep.h          |    9 ++-
 include/sheepdog_proto.h |    1 +
 sheep/ops.c              |   30 ++++++++++++
 sheep/sheep_priv.h       |   34 +++++++++++++-
 sheep/simple_store.c     |   10 +---
 sheep/store.c            |  115 +++++++++++++++++++++++++++++++++++----------
 8 files changed, 219 insertions(+), 40 deletions(-)

diff --git a/collie/cluster.c b/collie/cluster.c
index 6fbda6b..a97ef79 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -20,6 +20,7 @@ struct cluster_cmd_data {
 	int copies;
 	int nohalt;
 	int force;
+	char argv[10];
 } cluster_cmd_data;
 
 static void set_nohalt(uint16_t *p)
@@ -28,6 +29,53 @@ static void set_nohalt(uint16_t *p)
 		*p |= SD_FLAG_NOHALT;
 }
 
+static int get_store_index(char *name)
+{
+	int ret = -1;
+	if (!strlen(name) || strcmp(name, "simple") == 0)
+		ret = 0;
+	return ret;
+}
+
+static int list_store(void)
+{
+	int fd, ret;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	unsigned rlen, wlen;
+	char buf[512] = { 0 };
+
+	fd = connect_to(sdhost, sdport);
+	if (fd < 0)
+		return EXIT_SYSFAIL;
+
+	memset(&hdr, 0, sizeof(hdr));
+
+        wlen = 0;
+        rlen = 512;
+        hdr.opcode = SD_OP_GET_STORE_LIST;
+        hdr.data_length = rlen;
+
+        ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
+        close(fd);
+
+        if (ret) {
+                fprintf(stderr, "Failed to connect\n");
+                return EXIT_FAILURE;
+        }
+
+        if (rsp->result != SD_RES_SUCCESS) {
+                fprintf(stderr, "Restore failed: %s\n",
+                                sd_strerror(rsp->result));
+                return EXIT_FAILURE;
+        }
+
+	printf("Available stores:\n");
+	printf("---------------------------------------\n");
+	printf("%s\n", buf);
+	return EXIT_SUCCESS;
+}
+
 static int cluster_format(int argc, char **argv)
 {
 	int fd, ret;
@@ -35,6 +83,9 @@ static int cluster_format(int argc, char **argv)
 	struct sd_so_rsp *rsp = (struct sd_so_rsp *)&hdr;
 	unsigned rlen, wlen;
 	struct timeval tv;
+	uint8_t idx;
+
+	idx = get_store_index(cluster_cmd_data.argv);
 
 	fd = connect_to(sdhost, sdport);
 	if (fd < 0)
@@ -50,6 +101,7 @@ static int cluster_format(int argc, char **argv)
 		set_nohalt(&hdr.flags);
 	hdr.epoch = node_list_version;
 	hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
+	hdr.index = idx;
 
 	rlen = 0;
 	wlen = 0;
@@ -64,7 +116,7 @@ static int cluster_format(int argc, char **argv)
 	if (rsp->result != SD_RES_SUCCESS) {
 		fprintf(stderr, "Format failed: %s\n",
 				sd_strerror(rsp->result));
-		return EXIT_FAILURE;
+		return list_store();
 	}
 
 	return EXIT_SUCCESS;
@@ -237,7 +289,7 @@ static int cluster_recover(int argc, char **argv)
 static struct subcommand cluster_cmd[] = {
 	{"info", NULL, "aprh", "show cluster information",
 	 0, cluster_info},
-	{"format", NULL, "cHaph", "create a Sheepdog store",
+	{"format", NULL, "bcHaph", "create a Sheepdog store",
 	 0, cluster_format},
 	{"shutdown", NULL, "aph", "stop Sheepdog",
 	 SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown},
@@ -252,6 +304,9 @@ static int cluster_parser(int ch, char *opt)
 	char *p;
 
 	switch (ch) {
+	case 'b':
+		strcpy(cluster_cmd_data.argv, opt);
+		break;
 	case 'c':
 		copies = strtol(opt, &p, 10);
 		if (opt == p || copies < 1) {
diff --git a/collie/collie.c b/collie/collie.c
index 19cc9a9..baf7c67 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -40,6 +40,7 @@ static const struct sd_option collie_options[] = {
 	{'d', "delete", 0, "delete a key"},
 
 	/* cluster options */
+	{'b', "store", 1, "specify backend store"},
 	{'c', "copies", 1, "specify the data redundancy (number of copies)"},
 	{'H', "nohalt", 0, "serve IO requests even if there are too few\n\
                           nodes for the configured redundancy"},
diff --git a/include/sheep.h b/include/sheep.h
index 906c1f5..6cd63e7 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -27,7 +27,7 @@
 #define SD_MAX_VNODES 65536
 #define SD_MAX_VMS   4096 /* FIXME: should be removed */
 
-#define SD_OP_SHEEP         0x80
+#define SD_OP_SHEEP          0x80
 #define SD_OP_DEL_VDI        0x81
 #define SD_OP_GET_NODE_LIST  0x82
 #define SD_OP_GET_VM_LIST    0x83
@@ -37,7 +37,8 @@
 #define SD_OP_STAT_CLUSTER   0x87
 #define SD_OP_KILL_NODE      0x88
 #define SD_OP_GET_VDI_ATTR   0x89
-#define SD_OP_RECOVER	     0x8A
+#define SD_OP_RECOVER        0x8a
+#define SD_OP_GET_STORE_LIST 0x90
 
 #define SD_FLAG_CMD_IO_LOCAL   0x0010
 #define SD_FLAG_CMD_RECOVERY 0x0020
@@ -72,7 +73,8 @@ struct sd_so_req {
 	uint64_t	ctime;
 	uint32_t	copies;
 	uint32_t	tag;
-	uint32_t	opcode_specific[2];
+	uint8_t		index;
+	uint8_t		opcode_specific[7];
 };
 
 struct sd_so_rsp {
@@ -263,6 +265,7 @@ static inline const char *sd_strerror(int err)
 		{SD_RES_JOIN_FAILED, "Node has failed to join cluster"},
 		{SD_RES_HALT, "IO has halted as there are too few living nodes"},
 		{SD_RES_MANUAL_RECOVER, "Cluster is running/halted and cannot be manually recovered"},
+		{SD_RES_NO_STORE, "Targeted backend store is not found"},
 
 		{SD_RES_OLD_NODE_VER, "Remote node has an old epoch"},
 		{SD_RES_NEW_NODE_VER, "Remote node has a new epoch"},
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index b664223..289502f 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -59,6 +59,7 @@
 #define SD_RES_JOIN_FAILED   0x18 /* Target node had failed to join sheepdog */
 #define SD_RES_HALT 0x19 /* Sheepdog is stopped doing IO */
 #define SD_RES_MANUAL_RECOVER   0x1A /* Users should not manually recover this cluster */
+#define SD_RES_NO_STORE         0x20 /* No targeted backend store */
 
 /*
  * Object ID rules
diff --git a/sheep/ops.c b/sheep/ops.c
index 13ecdf2..478d41a 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -12,6 +12,10 @@
 #include <stdlib.h>
 
 #include "sheep_priv.h"
+#include "strbuf.h"
+
+extern char *obj_path;
+extern struct store_driver *sd_store;
 
 enum sd_op_type {
 	SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
@@ -132,6 +136,11 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
 	int i, latest_epoch, ret;
 	uint64_t ctime;
 
+	sd_store = find_store_driver(index_to_name(hdr->index));
+	if (!sd_store)
+		return SD_RES_NO_STORE;
+
+	sd_store->init(obj_path);
 	sys->nr_sobjs = hdr->copies;
 	sys->flags = hdr->flags;
 	if (!sys->nr_sobjs)
@@ -212,6 +221,21 @@ static int cluster_get_vdi_attr(const struct sd_req *req, struct sd_rsp *rsp,
 	return ret;
 }
 
+static int local_get_store_list(const struct sd_req *req, struct sd_rsp *rsp,
+				void *data)
+{
+	struct strbuf buf = STRBUF_INIT;
+	struct store_driver *driver;
+
+	list_for_each_entry(driver, &store_drivers, list) {
+		strbuf_addf(&buf, "%s ", driver->name);
+	}
+	memcpy(data, buf.buf, buf.len);
+
+	strbuf_release(&buf);
+	return SD_RES_SUCCESS;
+}
+
 static int local_read_vdis(const struct sd_req *req, struct sd_rsp *rsp,
 			   void *data)
 {
@@ -434,6 +458,12 @@ static struct sd_op_template sd_ops[] = {
 	},
 
 	/* local operations */
+	[SD_OP_GET_STORE_LIST] = {
+		.type = SD_OP_TYPE_LOCAL,
+		.force = 1,
+		.process_work = local_get_store_list,
+	},
+
 	[SD_OP_READ_VDIS] = {
 		.type = SD_OP_TYPE_LOCAL,
 		.force = 1,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 5145f06..bee2cac 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -166,7 +166,8 @@ struct siocb {
 };
 
 struct store_driver {
-	const char *driver_name;
+	struct list_head list;
+	const char *name;
 	int (*init)(char *path);
 	int (*open)(uint64_t oid, struct siocb *, int create);
 	int (*write)(uint64_t oid, struct siocb *);
@@ -178,7 +179,33 @@ struct store_driver {
 	int (*atomic_put)(uint64_t oid, struct siocb *);
 };
 
-extern void register_store_driver(struct store_driver *);
+extern struct list_head store_drivers;
+#define add_store_driver(driver)                                 \
+static void __attribute__((constructor)) add_ ## driver(void) {  \
+        list_add(&driver.list, &store_drivers);                  \
+}
+
+static inline struct store_driver *find_store_driver(const char *name)
+{
+	struct store_driver *driver;
+
+	list_for_each_entry(driver, &store_drivers, list) {
+		if (strcmp(driver->name, name) == 0)
+			return driver;
+	}
+	return NULL;
+}
+
+static inline const char *index_to_name(int idx)
+{
+        static const char *stores[] = {
+		[0] = "simple",
+	};
+
+	if (idx < 0 || idx >= ARRAY_SIZE(stores))
+		return "INVALID";
+	return stores[idx];
+}
 
 extern struct cluster_info *sys;
 
@@ -242,6 +269,9 @@ int store_write_obj(const struct sd_req *, struct sd_rsp *, void *);
 int store_read_obj(const struct sd_req *, struct sd_rsp *, void *);
 int store_remove_obj(const struct sd_req *, struct sd_rsp *, void *);
 
+int store_file_write(void *buffer, size_t len);
+void *store_file_read(void);
+
 #define NR_GW_WORKER_THREAD 4
 #define NR_IO_WORKER_THREAD 4
 
diff --git a/sheep/simple_store.c b/sheep/simple_store.c
index ddaa781..a5711c1 100644
--- a/sheep/simple_store.c
+++ b/sheep/simple_store.c
@@ -239,8 +239,8 @@ out:
 	return ret;
 }
 
-struct store_driver store = {
-	.driver_name = "simple",
+struct store_driver simple_store = {
+	.name = "simple",
 	.init = simple_store_init,
 	.open = simple_store_open,
 	.write = simple_store_write,
@@ -251,8 +251,4 @@ struct store_driver store = {
 	.atomic_put = simple_store_atomic_put,
 };
 
-void register_store_driver(struct store_driver *driver)
-{
-	store = *driver;
-	eprintf("Register %s store driver\n", store.driver_name);
-}
+add_store_driver(simple_store);
diff --git a/sheep/store.c b/sheep/store.c
index 3323fbc..0025252 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -43,7 +43,8 @@ static char *config_path;
 static mode_t def_dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IWGRP | S_IXGRP;
 mode_t def_fmode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
 
-extern struct store_driver store;
+struct store_driver *sd_store;
+LIST_HEAD(store_drivers);
 
 static int obj_cmp(const void *oid1, const void *oid2)
 {
@@ -59,7 +60,7 @@ static int obj_cmp(const void *oid1, const void *oid2)
 
 static void get_store_dir(struct strbuf *buf, int epoch)
 {
-	if (!strcmp(store.name, "simple"))
+	if (!strcmp(sd_store->name, "simple"))
 		strbuf_addf(buf, "%s%08u", obj_path, epoch);
 	else /* XXX assume other store doesn't need epoch/obj pattern */
 		strbuf_addf(buf, "%s", obj_path);
@@ -138,7 +139,7 @@ int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *d
 		iocb.buf = buf;
 		iocb.length = 0;
 		iocb.epoch = i;
-		store.get_objlist(&iocb);
+		sd_store->get_objlist(&iocb);
 		nr = merge_objlist(list, nr, (uint64_t *)iocb.buf, iocb.length);
 	}
 out:
@@ -173,17 +174,17 @@ static int read_copy_from_cluster(struct request *req, uint32_t epoch,
 		if (is_myself(e[n].addr, e[n].port)) {
 			memset(&iocb, 0, sizeof(iocb));
 			iocb.epoch = epoch;
-			ret = store.open(oid, &iocb, 0);
+			ret = sd_store->open(oid, &iocb, 0);
 			if (ret != SD_RES_SUCCESS)
 				continue;
 
 			iocb.buf = buf;
 			iocb.length = SD_DATA_OBJ_SIZE;
 			iocb.offset = 0;
-			ret = store.read(oid, &iocb);
+			ret = sd_store->read(oid, &iocb);
 			if (ret != SD_RES_SUCCESS)
 				continue;
-			store.close(oid, &iocb);
+			sd_store->close(oid, &iocb);
 			goto out;
 		}
 
@@ -426,7 +427,7 @@ out:
 
 int update_epoch_store(uint32_t epoch)
 {
-	if (!strcmp(store.name, "simple")) {
+	if (!strcmp(sd_store->name, "simple")) {
 		char new[1024];
 
 		snprintf(new, sizeof(new), "%s%08u/", obj_path, epoch);
@@ -565,21 +566,21 @@ int store_read_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.epoch = epoch;
 	iocb.flags = hdr->flags;
-	ret = store.open(hdr->oid, &iocb, 0);
+	ret = sd_store->open(hdr->oid, &iocb, 0);
 	if (ret != SD_RES_SUCCESS)
 		return ret;
 
 	iocb.buf = request->data;
 	iocb.length = hdr->data_length;
 	iocb.offset = hdr->offset;
-	ret = store.read(hdr->oid, &iocb);
+	ret = sd_store->read(hdr->oid, &iocb);
 	if (ret != SD_RES_SUCCESS)
 		goto out;
 
 	rsps->data_length = hdr->data_length;
 	rsps->copies = sys->nr_sobjs;
 out:
-	store.close(hdr->oid, &iocb);
+	sd_store->close(hdr->oid, &iocb);
 	return ret;
 }
 
@@ -604,11 +605,11 @@ static int do_write_obj(struct siocb *iocb, struct sd_obj_req *req, uint32_t epo
 			strbuf_release(&buf);
 			return SD_RES_EIO;
 		}
-		ret = store.write(oid, iocb);
+		ret = sd_store->write(oid, iocb);
 		jrnl_end(jd);
 		strbuf_release(&buf);
 	} else
-		ret = store.write(oid, iocb);
+		ret = sd_store->write(oid, iocb);
 
 	return ret;
 }
@@ -624,13 +625,13 @@ int store_write_obj(const struct sd_req *req, struct sd_rsp *rsp, void *data)
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.epoch = epoch;
 	iocb.flags = hdr->flags;
-	ret = store.open(hdr->oid, &iocb, 0);
+	ret = sd_store->open(hdr->oid, &iocb, 0);
 	if (ret != SD_RES_SUCCESS)
 		return ret;
 
 	ret = do_write_obj(&iocb, hdr, epoch, request->data);
 
-	store.close(hdr->oid, &iocb);
+	sd_store->close(hdr->oid, &iocb);
 	return ret;
 }
 
@@ -651,7 +652,7 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.epoch = epoch;
 	iocb.flags = hdr->flags;
-	ret = store.open(hdr->oid, &iocb, 1);
+	ret = sd_store->open(hdr->oid, &iocb, 1);
 	if (ret != SD_RES_SUCCESS)
 		return ret;
 	if (hdr->flags & SD_FLAG_CMD_COW) {
@@ -666,14 +667,14 @@ int store_create_and_write_obj(const struct sd_req *req, struct sd_rsp *rsp, voi
 		iocb.buf = buf;
 		iocb.length = SD_DATA_OBJ_SIZE;
 		iocb.offset = 0;
-		ret = store.write(hdr->oid, &iocb);
+		ret = sd_store->write(hdr->oid, &iocb);
 		if (ret != SD_RES_SUCCESS)
 			goto out;
 	}
 	ret = do_write_obj(&iocb, hdr, epoch, request->data);
 out:
 	free(buf);
-	store.close(hdr->oid, &iocb);
+	sd_store->close(hdr->oid, &iocb);
 	return ret;
 }
 
@@ -1228,7 +1229,7 @@ static int recover_object_from_replica(uint64_t oid,
 
 	if (is_myself(entry->addr, entry->port)) {
 		iocb.epoch = epoch;
-		ret = store.link(oid, &iocb, tgt_epoch);
+		ret = sd_store->link(oid, &iocb, tgt_epoch);
 		if (ret == SD_RES_SUCCESS) {
 			ret = 0;
 			goto done;
@@ -1277,7 +1278,7 @@ static int recover_object_from_replica(uint64_t oid,
 		iocb.epoch = epoch;
 		iocb.length = rlen;
 		iocb.buf = buf;
-		ret = store.atomic_put(oid, &iocb);
+		ret = sd_store->atomic_put(oid, &iocb);
 		if (ret!= SD_RES_SUCCESS) {
 			ret = -1;
 			goto out;
@@ -1410,9 +1411,9 @@ static void recover_object(struct work *work)
 	eprintf("done:%"PRIu32" count:%"PRIu32", oid:%"PRIx64"\n", rw->done, rw->count, oid);
 
 	iocb.epoch = epoch;
-	ret = store.open(oid, &iocb, 0);
+	ret = sd_store->open(oid, &iocb, 0);
 	if (ret == SD_RES_SUCCESS) {
-		store.close(oid, &iocb);
+		sd_store->close(oid, &iocb);
 		dprintf("the object is already recovered\n");
 		return;
 	}
@@ -1494,10 +1495,10 @@ int is_recoverying_oid(uint64_t oid)
 
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.epoch = sys->epoch;
-	ret = store.open(oid, &iocb, 0);
+	ret = sd_store->open(oid, &iocb, 0);
 	if (ret == SD_RES_SUCCESS) {
 		dprintf("the object %" PRIx64 " is already recoverd\n", oid);
-		store.close(oid, &iocb);
+		sd_store->close(oid, &iocb);
 		return 0;
 	}
 
@@ -1993,6 +1994,7 @@ static int init_config_path(const char *base_path)
 int init_store(const char *d)
 {
 	int ret;
+	char *driver_name;
 
 	ret = init_obj_path(d);
 	if (ret)
@@ -2014,10 +2016,15 @@ int init_store(const char *d)
 	if (ret)
 		return ret;
 
-	ret = store.init(obj_path);
-	if (ret)
-		return ret;
+	driver_name = (char *)store_file_read();
+	if (driver_name)
+		sd_store = find_store_driver(driver_name);
 
+	if (sd_store) {
+		ret = sd_store->init(obj_path);
+		if (ret != SD_RES_SUCCESS)
+			return ret;
+	}
 	return ret;
 }
 
@@ -2134,3 +2141,59 @@ int get_cluster_flags(uint16_t *flags)
 out:
 	return ret;
 }
+
+void *store_file_read(void)
+{
+	struct strbuf buf = STRBUF_INIT;
+	struct stat st;
+	void *buffer = NULL;
+	int len, fd;
+
+	strbuf_addf(&buf, "%s%s", obj_path, ".store");
+
+	fd = open(buf.buf, O_RDONLY);
+	if (fd < 0) {
+		dprintf("not found .store file\n");
+		goto out;
+	}
+	if (fstat(fd, &st) < 0) {
+		dprintf("%m\n");
+		goto out_close;
+	}
+
+	len = st.st_size;
+	buffer = xmalloc(len);
+	len = xread(fd, buffer, len);
+	if (len != st.st_size) {
+		free(buffer);
+		buffer = NULL;
+		goto out_close;
+	}
+out_close:
+	close(fd);
+out:
+	strbuf_release(&buf);
+	return buffer;
+}
+
+int store_file_write(void *buffer, size_t len)
+{
+        int fd, ret = -1;
+        struct strbuf buf = STRBUF_INIT;
+
+	strbuf_addf(&buf, "%s%s", obj_path, ".store");
+        fd = open(buf.buf, O_WRONLY | O_TRUNC | O_CREAT, def_fmode);
+        if (fd < 0) {
+                dprintf("%m\n");
+                goto out;
+        }
+
+        ret = xwrite(fd, buffer, len);
+        if (ret != len)
+                ret = -1;
+
+        close(fd);
+out:
+        strbuf_release(&buf);
+        return ret;
+}
-- 
1.7.8.rc3




More information about the sheepdog mailing list