[sheepdog] [PATCH] sheep, dog: add vnodes fixed options.

Saeki Masaki saeki.masaki at po.ntts.co.jp
Fri Dec 19 02:07:23 CET 2014


In the current sheepdog, vnodes is recalculated at the time of
node increase or decrease.

In the auto recovery, first get the object from the other node,
finally delete the object do not need.
During auto recovery run, available disk decreases.
In the worst case, it exhaust available disk.

Add the following new commands and options.
1. option to specify vnodes in sheep. (-V, --vnodes)
  - The old days it has been implemented as -v(--vnodes) option.
  - For now -v option is used to print version. so add in -V (large).
$ sheep -V 100 /var/lib/sheepdog
  If -V is specified, vnodes strategy of sheep is 'fixed'
  (default value is 'auto')

2. option to dog cluster format with vnode fixed. (-V, --fixedvnodes)
  $ dog cluster format -V
  If 'fixed' and 'auto' vnodes strategy are mixed,
  cluster format command fails.
  (different sheep of vnodes strategy can not be mixed in the cluster)

3. dog command to change the vnodes
  $ dog node vnode set <vnodes>
  After changing the vnodes, new epoch are created and auto recovery will start.

If you want to operate the vnodes fixed,
it is necessary to manage the vnodes with capacity of
the data store in each node.
So you should use this option carefully.

For example of using fixed vnodes strategy:

1) start sheep with fixed vnodes strategy.
$ sheep -V 100 /var/lib/sheepdog
$ sheep -V 110 /var/lib/sheepdog
$ dog node list
  Id   Host:Port         V-Nodes       Zone
   0   172.16.4.205:7000        100 1812140204
   1   172.16.4.206:7000        120 1828917420

2) format the cluster with fixed vnodes strategy.
$ dog cluster format -V

3) check vnodes strategy of cluster.
$ dog cluster info -v
Cluster status: running, auto-recovery enabled
Cluster store: plain with 3 redundancy policy
Cluster vnodes strategy: fixed
Cluster vnode mode: node
Cluster created at Wed Dec 17 18:20:10 2014

Epoch Time           Version [Host:Port:V-Nodes,,,]
2014-12-17 18:20:10      1 [172.16.4.205:7000:100, 172.16.4.206:7000:120]

4) change of vnodes.
$ dog node vnodes set 140
$ dog node list
  Id   Host:Port         V-Nodes       Zone
   0   172.16.4.205:7000        140 1812140204
   1   172.16.4.206:7000        120 1828917420

Signed-off-by Masaki Saeki <masaki.saeki at po.ntts.co.jp>

---
 dog/cluster.c            |   82 ++++++++++++++++++++++++++++++++++++++--------
 dog/node.c               |   67 +++++++++++++++++++++++++++++++++++++
 include/internal_proto.h |    3 ++
 include/sheep.h          |    8 ++++
 include/sheepdog_proto.h |    2 +
 sheep/config.c           |   15 ++++++++-
 sheep/group.c            |   62 ++++++++++++++++++++++++++++++++---
 sheep/ops.c              |   82 ++++++++++++++++++++++++++++++++++++++++++++++
 sheep/sheep.c            |   31 ++++++++++++++++-
 9 files changed, 330 insertions(+), 22 deletions(-)

diff --git a/dog/cluster.c b/dog/cluster.c
index 20f190b..c92141e 100644
--- a/dog/cluster.c
+++ b/dog/cluster.c
@@ -15,6 +15,7 @@
 #include <sys/time.h>
 
 #include "dog.h"
+#include "sheep.h"
 #include "farm/farm.h"
 
 static struct sd_option cluster_options[] = {
@@ -27,6 +28,7 @@ static struct sd_option cluster_options[] = {
 	 "do not serve write request if number of nodes is not sufficient"},
 	{'z', "block_size_shift", true, "specify the shift num of default"
 	      " data object size"},
+	{'V', "fixedvnodes", false, "disable automatic vnodes calculation"},
 	{ 0, NULL, false, NULL },
 };
 
@@ -38,6 +40,7 @@ static struct cluster_cmd_data {
 	bool force;
 	bool strict;
 	char name[STORE_LEN];
+	bool fixed_vnodes;
 } cluster_cmd_data;
 
 #define DEFAULT_STORE	"plain"
@@ -87,6 +90,41 @@ static int cluster_format(int argc, char **argv)
 	struct timeval tv;
 	char store_name[STORE_LEN];
 	static DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS);
+	struct sd_node *n;
+
+	rb_for_each_entry(n, &sd_nroot, rb) {
+		struct sd_req info_req;
+		struct sd_rsp *info_rsp = (struct sd_rsp *)&info_req;
+		struct cluster_info cinfo;
+
+		sd_init_req(&info_req, SD_OP_CLUSTER_INFO);
+		info_req.data_length = sizeof(cinfo);
+		ret = dog_exec_req(&n->nid, &info_req, &cinfo);
+		if (ret < 0) {
+			sd_err("Fail to execute request");
+			return EXIT_FAILURE;
+		}
+		if (info_rsp->result != SD_RES_SUCCESS) {
+			sd_err("%s", sd_strerror(info_rsp->result));
+			return EXIT_FAILURE;
+		}
+
+		if (n->nr_vnodes != 0) {
+			if ((cinfo.flags & SD_CLUSTER_FLAG_AUTO_VNODES)
+				&& cluster_cmd_data.fixed_vnodes) {
+				sd_err("Can not apply the option of '-V', "
+					"because there are vnode strategy of sheep "
+					"is auto in the cluster");
+				return EXIT_FAILURE;
+			} else if (!(cinfo.flags & SD_CLUSTER_FLAG_AUTO_VNODES)
+				&& !cluster_cmd_data.fixed_vnodes) {
+				sd_err("Need to specify the option of '-V', "
+					"because there are vnode strategy of sheep "
+					"is fixed in the cluster");
+				return EXIT_FAILURE;
+			}
+		}
+	}
 
 	if (cluster_cmd_data.copies > sd_nodes_nr) {
 		char info[1024];
@@ -132,6 +170,11 @@ static int cluster_format(int argc, char **argv)
 	hdr.cluster.flags |= SD_CLUSTER_FLAG_DISKMODE;
 #endif
 
+	if (cluster_cmd_data.fixed_vnodes)
+		hdr.cluster.flags &= ~SD_CLUSTER_FLAG_AUTO_VNODES;
+	else
+		hdr.cluster.flags |= SD_CLUSTER_FLAG_AUTO_VNODES;
+
 	printf("using backend %s store\n", store_name);
 	ret = dog_exec_req(&sd_nid, &hdr, store_name);
 	if (ret < 0)
@@ -160,14 +203,15 @@ static void print_nodes(const struct epoch_log *logs, uint16_t flags)
 				if (entry->disks[nr_disk].disk_id == 0)
 					break;
 			}
-			printf("%s%s(%d)",
-			       (i == 0) ? "" : ", ",
-			       addr_to_str(entry->nid.addr, entry->nid.port),
-			       nr_disk);
+			printf("%s%s:%d(%d)",
+				(i == 0) ? "" : ", ",
+				addr_to_str(entry->nid.addr, entry->nid.port),
+					entry->nr_vnodes, nr_disk);
 		} else
-			printf("%s%s",
-			       (i == 0) ? "" : ", ",
-			       addr_to_str(entry->nid.addr, entry->nid.port));
+			printf("%s%s:%d",
+				(i == 0) ? "" : ", ",
+				addr_to_str(entry->nid.addr, entry->nid.port),
+					entry->nr_vnodes);
 	}
 }
 
@@ -232,6 +276,15 @@ retry:
 			}
 			printf("%s with %s redundancy policy\n",
 			       logs->drv_name, copy);
+
+			/* show vnode strategy */
+			if (!raw_output)
+				printf("Cluster vnodes strategy: ");
+			if (logs->flags & SD_CLUSTER_FLAG_AUTO_VNODES)
+				printf("auto\n");
+			else
+				printf("fixed\n");
+
 		} else
 			printf("%s\n", sd_strerror(rsp->result));
 
@@ -239,15 +292,16 @@ retry:
 		if (!raw_output)
 			printf("Cluster vnode mode: ");
 		if (logs->flags & SD_CLUSTER_FLAG_DISKMODE)
-			printf("disk");
+			printf("disk\n");
 		else
-			printf("node");
+			printf("node\n");
 	}
 
 	if (!raw_output && rsp->data_length > 0) {
 		ct = logs[0].ctime >> 32;
-		printf("\nCluster created at %s\n", ctime(&ct));
-		printf("Epoch Time           Version\n");
+		printf("Cluster created at %s\n", ctime(&ct));
+		printf("Epoch Time           Version [Host:Port:V-Nodes,,,]");
+		printf("\n");
 	}
 
 	nr_logs = rsp->data_length / (sizeof(struct epoch_log)
@@ -761,7 +815,7 @@ failure:
 static struct subcommand cluster_cmd[] = {
 	{"info", NULL, "aprhvT", "show cluster information",
 	 NULL, CMD_NEED_NODELIST, cluster_info, cluster_options},
-	{"format", NULL, "bctaphzT", "create a Sheepdog store",
+	{"format", NULL, "bctaphzTV", "create a Sheepdog store",
 	 NULL, CMD_NEED_NODELIST, cluster_format, cluster_options},
 	{"shutdown", NULL, "aphT", "stop Sheepdog",
 	 NULL, 0, cluster_shutdown, cluster_options},
@@ -823,9 +877,9 @@ static int cluster_parser(int ch, const char *opt)
 			" Please set shift bit larger than 20");
 			exit(EXIT_FAILURE);
 		}
-
 		cluster_cmd_data.block_size_shift = block_size_shift;
-
+	case 'V':
+		cluster_cmd_data.fixed_vnodes = true;
 		break;
 	}
 
diff --git a/dog/node.c b/dog/node.c
index a4e9142..b9d441a 100644
--- a/dog/node.c
+++ b/dog/node.c
@@ -625,6 +625,71 @@ static int node_log(int argc, char **argv)
 	return do_generic_subcommand(node_log_cmd, argc, argv);
 }
 
+static int do_vnodes_set(const struct node_id *nid, int *nr_vnodes)
+{
+	int ret = 0;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+
+	sd_init_req(&hdr, SD_OP_SET_VNODES);
+	hdr.flags = SD_FLAG_CMD_WRITE;
+	hdr.data_length = sizeof(nr_vnodes);
+
+	ret = dog_exec_req(nid, &hdr, nr_vnodes);
+	if (ret < 0)
+		return EXIT_SYSFAIL;
+
+	if (rsp->result != SD_RES_SUCCESS)
+		return EXIT_FAILURE;
+
+	return ret;
+}
+
+static int node_vnodes_set(int argc, char **argv)
+{
+	int ret = 0;
+	char *p;
+	int32_t nr_vnodes = strtol(argv[optind], &p, 10);
+
+	if (argv[optind] == p || nr_vnodes < 1 || nr_vnodes > UINT16_MAX
+		|| *p != '\0') {
+		sd_err("Invalid number of vnodes '%s': must be an integer "
+			"between 1 and %u",
+			argv[optind], UINT16_MAX);
+		exit(EXIT_USAGE);
+	}
+
+	ret = do_vnodes_set(&sd_nid, &nr_vnodes);
+
+	switch (ret) {
+	case EXIT_FAILURE:
+	case EXIT_SYSFAIL:
+		sd_err("Failed to execute request");
+		ret = -1;
+		break;
+	case EXIT_SUCCESS:
+		/* do nothing */
+		break;
+	default:
+		sd_err("unknown return code of do_vnodes_set(): %d", ret);
+		ret = -1;
+		break;
+	}
+
+	return ret;
+}
+
+static struct subcommand node_vnodes_cmd[] = {
+	{"set", "<num of vnodes>", NULL, "set new vnodes",
+	 NULL, CMD_NEED_ARG, node_vnodes_set},
+	{NULL},
+};
+
+static int node_vnodes(int argc, char **argv)
+{
+	return do_generic_subcommand(node_vnodes_cmd, argc, argv);
+}
+
 static struct subcommand node_cmd[] = {
 	{"kill", "<node id>", "aprhlT", "kill node", NULL,
 	 CMD_NEED_NODELIST, node_kill, node_options},
@@ -640,6 +705,8 @@ static struct subcommand node_cmd[] = {
 	 0, node_stat, node_options},
 	{"log", NULL, "aphT", "show or set log level of the node", node_log_cmd,
 	 CMD_NEED_ARG, node_log},
+	{"vnodes", "<num of vnodes>", "aph", "set new vnodes", node_vnodes_cmd,
+	 CMD_NEED_ARG, node_vnodes},
 	{NULL,},
 };
 
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 3f5d77f..f280d6d 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -111,6 +111,8 @@
 #define SD_OP_VDI_STATE_SNAPSHOT_CTL  0xC7
 #define SD_OP_INODE_COHERENCE 0xC8
 #define SD_OP_READ_DEL_VDIS  0xC9
+#define SD_OP_SET_VNODES 0xCC
+#define SD_OP_GET_VNODES 0xCD
 
 /* internal flags for hdr.flags, must be above 0x80 */
 #define SD_FLAG_CMD_RECOVERY 0x0080
@@ -143,6 +145,7 @@
 
 #define SD_CLUSTER_FLAG_STRICT		0x0001 /* Strict mode for write */
 #define SD_CLUSTER_FLAG_DISKMODE	0x0002 /* Disk mode for cluster */
+#define SD_CLUSTER_FLAG_AUTO_VNODES	0x0004 /* Cluster vnodes strategy */
 
 enum sd_status {
 	SD_STATUS_OK = 1,
diff --git a/include/sheep.h b/include/sheep.h
index 22524c1..fe6f066 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -149,6 +149,9 @@ static inline const char *sd_strerror(int err)
 			"IO has halted as there are not enough living nodes",
 		[SD_RES_READONLY] = "Object is read-only",
 		[SD_RES_INODE_INVALIDATED] = "Inode object is invalidated",
+		[SD_RES_INVALID_VNODES_STRATEGY] =
+			"Invalid cluster vnodes strategy",
+		[SD_RES_GATEWAY_MODE] = "Targeted node is gateway mode",
 
 		/* from internal_proto.h */
 		[SD_RES_OLD_NODE_VER] = "Request has an old epoch",
@@ -328,4 +331,9 @@ static inline bool is_cluster_diskmode(const struct cluster_info *cinfo)
 	return (cinfo->flags & SD_CLUSTER_FLAG_DISKMODE) > 0;
 }
 
+static inline bool is_cluster_autovnodes(const struct cluster_info *cinfo)
+{
+	return (cinfo->flags & SD_CLUSTER_FLAG_AUTO_VNODES) > 0;
+}
+
 #endif
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 4f0c48c..28ededd 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -86,6 +86,8 @@
 #define SD_RES_INCOMPLETE    0x1B /* Object (in kv) is incomplete uploading */
 #define SD_RES_COLLECTING_CINFO 0x1C /* sheep is collecting cluster wide status, not ready for operation */
 #define SD_RES_INODE_INVALIDATED 0x1D /* inode object in client is invalidated, refreshing is required */
+#define SD_RES_GATEWAY_MODE  0x1E /* Target node is gateway mode */
+#define SD_RES_INVALID_VNODES_STRATEGY 0x1F /* Invalid vnodes strategy */
 
 /* errors above 0x80 are sheepdog-internal */
 
diff --git a/sheep/config.c b/sheep/config.c
index 383a1ed..4a1e600 100644
--- a/sheep/config.c
+++ b/sheep/config.c
@@ -62,7 +62,12 @@ static int get_cluster_config(struct cluster_info *cinfo)
 {
 	cinfo->ctime = config.ctime;
 	cinfo->nr_copies = config.copies;
-	cinfo->flags = config.flags;
+	if (config.ctime > 0) {
+		cinfo->flags = config.flags;
+	} else {
+		cinfo->flags = (config.flags & ~SD_CLUSTER_FLAG_AUTO_VNODES) |
+			(cinfo->flags & SD_CLUSTER_FLAG_AUTO_VNODES);
+	}
 	cinfo->copy_policy = config.copy_policy;
 	memcpy(cinfo->store, config.store, sizeof(config.store));
 
@@ -121,6 +126,14 @@ int init_config_file(void)
 	}
 
 reload:
+	if ((config.flags & SD_CLUSTER_FLAG_AUTO_VNODES) !=
+			(sys->cinfo.flags & SD_CLUSTER_FLAG_AUTO_VNODES)
+		&& !sys->gateway_only
+		&& config.ctime > 0) {
+		sd_err("Designation of before a restart and a vnodes option is different.");
+		return -1;
+	}
+
 	ret = 0;
 	get_cluster_config(&sys->cinfo);
 	if ((config.flags & SD_CLUSTER_FLAG_DISKMODE) !=
diff --git a/sheep/group.c b/sheep/group.c
index 095b7c5..b33e514 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -145,7 +145,8 @@ struct vnode_info *alloc_vnode_info(const struct rb_root *nroot)
 		vnode_info->nr_nodes++;
 	}
 
-	recalculate_vnodes(&vnode_info->nroot);
+	if (is_cluster_autovnodes(&sys->cinfo))
+		recalculate_vnodes(&vnode_info->nroot);
 
 	if (is_cluster_diskmode(&sys->cinfo))
 		disks_to_vnodes(&vnode_info->nroot, &vnode_info->vroot);
@@ -1098,6 +1099,20 @@ static bool cluster_join_check(const struct cluster_info *cinfo)
 	if (!cluster_ctime_check(cinfo))
 		return false;
 
+	if (cinfo->ctime > 0 && sys->this_node.nr_vnodes != 0) {
+		if (!is_cluster_autovnodes(&sys->cinfo)
+			&& is_cluster_autovnodes(cinfo)) {
+			sd_err("failed to join for vnodes strategy unmatch. "
+				" cluster:fixed, joined:auto");
+			return false;
+		} else if (is_cluster_autovnodes(&sys->cinfo)
+			&& !is_cluster_autovnodes(cinfo)) {
+			sd_err("failed to join for vnodes strategy unmatch. "
+				" cluster:auto, joined:fixed");
+			return false;
+		}
+	}
+
 	/*
 	 * Sheepdog's recovery code assumes every node have the same epoch
 	 * history. But we don't check epoch history of joining node because:
@@ -1119,6 +1134,14 @@ main_fn void sd_accept_handler(const struct sd_node *joined,
 {
 	const struct cluster_info *cinfo = opaque;
 	struct sd_node *n;
+	uint16_t flags;
+
+	if (node_is_local(joined) && sys->gateway_only
+		&& sys->cinfo.ctime <= 0) {
+		flags = cinfo->flags & SD_CLUSTER_FLAG_AUTO_VNODES;
+	} else {
+		flags = sys->cinfo.flags & SD_CLUSTER_FLAG_AUTO_VNODES;
+	}
 
 	if (node_is_local(joined) && !cluster_join_check(cinfo)) {
 		sd_err("failed to join Sheepdog");
@@ -1127,6 +1150,9 @@ main_fn void sd_accept_handler(const struct sd_node *joined,
 
 	cluster_info_copy(&sys->cinfo, cinfo);
 
+	sys->cinfo.flags &= ~SD_CLUSTER_FLAG_AUTO_VNODES;
+	sys->cinfo.flags |= flags;
+
 	sd_debug("join %s", node_to_str(joined));
 	rb_for_each_entry(n, nroot, rb) {
 		sd_debug("%s", node_to_str(n));
@@ -1191,7 +1217,7 @@ main_fn void sd_leave_handler(const struct sd_node *left,
 	remove_node_from_participants(&left->nid);
 }
 
-static void update_node_size(struct sd_node *node)
+static void update_node_info(struct sd_node *node)
 {
 	struct vnode_info *cur_vinfo = get_vnode_info();
 	struct sd_node *n = rb_search(&cur_vinfo->nroot, node, rb, node_cmp);
@@ -1199,6 +1225,11 @@ static void update_node_size(struct sd_node *node)
 	if (unlikely(!n))
 		panic("can't find %s", node_to_str(node));
 	n->space = node->space;
+
+	if (!is_cluster_autovnodes(&sys->cinfo)) {
+		n->nr_vnodes = node->nr_vnodes;
+	}
+
 	if (is_cluster_diskmode(&sys->cinfo)) {
 		memset(n->disks, 0, sizeof(struct disk_info) * DISK_MAX);
 		for (int i = 0; i < DISK_MAX; i++)
@@ -1227,14 +1258,14 @@ static void kick_node_recover(void)
 
 main_fn void sd_update_node_handler(struct sd_node *node)
 {
-	update_node_size(node);
+	update_node_info(node);
 	kick_node_recover();
 }
 
 int create_cluster(int port, int64_t zone, int nr_vnodes,
 		   bool explicit_addr)
 {
-	int nr_nodes = 0, ret;
+	int nr_nodes = 0, ret, i, vnodes = 0;
 
 	if (!sys->cdrv) {
 		sys->cdrv = find_cdrv(DEFAULT_CLUSTER_DRIVER);
@@ -1270,11 +1301,32 @@ int create_cluster(int port, int64_t zone, int nr_vnodes,
 	sys->cinfo.epoch = get_latest_epoch();
 	if (sys->cinfo.epoch) {
 		ret = epoch_log_read(sys->cinfo.epoch, sys->cinfo.nodes,
-				sizeof(sys->cinfo.nodes), &nr_nodes);
+			sizeof(sys->cinfo.nodes), &nr_nodes);
 		if (ret != SD_RES_SUCCESS)
 			return -1;
 		sys->cinfo.nr_nodes = nr_nodes;
 	}
+
+	if (!is_cluster_autovnodes(&sys->cinfo)) {
+		for (i = 0; i < nr_nodes; i++) {
+			if ((addr_to_str(sys->this_node.nid.addr,
+					sys->this_node.nid.port)
+				== addr_to_str(sys->cinfo.nodes[i].nid.addr,
+					sys->cinfo.nodes[i].nid.port))
+				&& (sys->this_node.nid.port
+					== sys->cinfo.nodes[i].nid.port)) {
+				vnodes = sys->cinfo.nodes[i].nr_vnodes;
+				break;
+			}
+		}
+		if (sys->cinfo.epoch != 0 && sys->this_node.nr_vnodes != vnodes
+			&& !sys->gateway_only) {
+			sd_err("mismatch specified vnodes is compared with the previous. "
+				"previous vnodes:%d", vnodes);
+			return -1;
+		}
+	}
+
 	sys->cinfo.status = SD_STATUS_WAIT;
 
 	main_thread_set(pending_block_list,
diff --git a/sheep/ops.c b/sheep/ops.c
index 448fd8e..62d216e 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -264,6 +264,29 @@ static int remove_epoch(uint32_t epoch)
 	return SD_RES_SUCCESS;
 }
 
+static int get_vnodes(struct vnode_info *vinfo, int *nr_vnodes)
+{
+	int ret;
+	struct sd_node *node;
+
+	rb_for_each_entry(node, &vinfo->nroot, rb) {
+		struct sd_req hdr;
+		if (node_is_local(node))
+			continue;
+		if (node->nr_vnodes == 0)
+			continue;
+
+		sd_init_req(&hdr, SD_OP_GET_VNODES);
+		hdr.data_length = sizeof(*nr_vnodes);
+		hdr.epoch = sys_epoch();
+		ret = sheep_exec_req(&node->nid, &hdr, nr_vnodes);
+		if (ret != SD_RES_SUCCESS)
+			return ret;
+		node->nr_vnodes = *nr_vnodes;
+	}
+	return SD_RES_SUCCESS;
+}
+
 static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
 			   void *data, const struct sd_node *sender)
 {
@@ -271,6 +294,8 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
 	uint32_t latest_epoch;
 	struct store_driver *driver;
 	char *store_name = data;
+	int32_t nr_vnodes;
+	struct vnode_info *vinfo = get_vnode_info();
 
 	driver = find_store_driver(data);
 	if (!driver)
@@ -289,6 +314,12 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
 	if (ret != SD_RES_SUCCESS)
 		return ret;
 
+	if (sys->gateway_only) {
+		ret = get_vnodes(vinfo, &nr_vnodes);
+		if (ret != SD_RES_SUCCESS)
+			return ret;
+	}
+
 	sys->cinfo.nr_copies = req->cluster.copies;
 	sys->cinfo.copy_policy = req->cluster.copy_policy;
 	sys->cinfo.block_size_shift = req->cluster.block_size_shift;
@@ -1441,6 +1472,45 @@ static int cluster_inode_coherence(const struct sd_req *req,
 			       !!req->inode_coherence.validate, &sender->nid);
 }
 
+static int local_get_vnodes(struct request *req)
+{
+	int *nr_vnodes;
+
+	nr_vnodes = req->data;
+	req->rp.data_length = sizeof(nr_vnodes);
+	*nr_vnodes = sys->this_node.nr_vnodes;
+
+	return SD_RES_SUCCESS;
+}
+
+static int local_set_vnodes(const struct sd_req *req,
+				struct sd_rsp *rsp, void *data,
+				const struct sd_node *sender)
+{
+	int ret;
+	int *nr_vnodes = (int *)data;
+
+	if (sys->gateway_only) {
+		sd_err("failed to set vnodes, cause operating in gateway mode.");
+		return SD_RES_GATEWAY_MODE;
+	}
+	if (is_cluster_autovnodes(&sys->cinfo)) {
+		sd_err("failed to set vnodes, cause operating in auto vnodes strategy.");
+		return SD_RES_INVALID_VNODES_STRATEGY;
+	}
+
+	if (1 > *nr_vnodes || *nr_vnodes > UINT16_MAX) {
+		sd_err("invalid vnodes: %d", *nr_vnodes);
+		return SD_RES_INVALID_PARMS;
+	}
+
+	sys->this_node.nr_vnodes = *nr_vnodes;
+
+	ret = sys->cdrv->update_node(&sys->this_node);
+
+	return ret;
+}
+
 static struct sd_op_template sd_ops[] = {
 
 	/* cluster operations */
@@ -1827,6 +1897,18 @@ static struct sd_op_template sd_ops[] = {
 		.process_main = local_vdi_state_snapshot_ctl,
 	},
 
+	[SD_OP_GET_VNODES] = {
+		.name = "GET_VNODES",
+		.type = SD_OP_TYPE_LOCAL,
+		.process_work = local_get_vnodes,
+	},
+
+	[SD_OP_SET_VNODES] = {
+		.name = "SET_VNODES",
+		.type = SD_OP_TYPE_LOCAL,
+		.process_main = local_set_vnodes,
+	},
+
 	/* gateway I/O operations */
 	[SD_OP_CREATE_AND_WRITE_OBJ] = {
 		.name = "CREATE_AND_WRITE_OBJ",
diff --git a/sheep/sheep.c b/sheep/sheep.c
index ef45a33..9d4b7a2 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -115,6 +115,10 @@ static const char log_help[] =
 "  syslog             syslog of the system\n"
 "  stdout             standard output\n";
 
+static const char vnodes_help[] =
+"Example:\n\t$ sheep -V 128\n"
+"\tset number of vnodes\n";
+
 static struct sd_option sheep_options[] = {
 	{'b', "bindaddr", true, "specify IP address of interface to listen on",
 	 bind_help},
@@ -139,6 +143,7 @@ static struct sd_option sheep_options[] = {
 	 http_help},
 	{'u', "upgrade", false, "upgrade to the latest data layout"},
 	{'v', "version", false, "show the version"},
+	{'V', "vnodes", true, "set number of vnodes", vnodes_help},
 	{'w', "cache", true, "enable object cache", cache_help},
 	{'y', "myaddr", true, "specify the address advertised to other sheep",
 	 myaddr_help},
@@ -618,11 +623,12 @@ static void sighup_handler(int signum)
 int main(int argc, char **argv)
 {
 	int ch, longindex, ret, port = SD_LISTEN_PORT, io_port = SD_LISTEN_PORT;
-	int nr_vnodes = SD_DEFAULT_VNODES, rc = 1;
+	int rc = 1;
 	const char *dirp = DEFAULT_OBJECT_DIR, *short_options;
 	char *dir, *p, *pid_file = NULL, *bindaddr = NULL, log_path[PATH_MAX],
 	     *argp = NULL;
 	bool explicit_addr = false;
+	int32_t nr_vnodes = -1;
 	int64_t zone = -1;
 	struct cluster_driver *cdrv;
 	struct option *long_options;
@@ -631,6 +637,7 @@ int main(int argc, char **argv)
 	struct stat logdir_st;
 	enum log_dst_type log_dst_type;
 
+	sys->cinfo.flags |= SD_CLUSTER_FLAG_AUTO_VNODES;
 	sys->node_status = SD_NODE_STATUS_INITIALIZATION;
 
 	install_crash_handler(crash_handler);
@@ -675,7 +682,10 @@ int main(int argc, char **argv)
 			sys->backend_dio = true;
 			break;
 		case 'g':
-			/* same as '-v 0' */
+			if (nr_vnodes > 0) {
+				sd_err("Options '-g' and '-V' can not be both specified");
+				exit(1);
+			}
 			nr_vnodes = 0;
 			break;
 		case 'z':
@@ -756,6 +766,21 @@ int main(int argc, char **argv)
 				PACKAGE_VERSION);
 			exit(0);
 			break;
+		case 'V':
+			sys->cinfo.flags &= ~SD_CLUSTER_FLAG_AUTO_VNODES;
+			if (nr_vnodes == 0) {
+				sd_err("Options '-g' and '-V' can not be both specified");
+				exit(1);
+			}
+			nr_vnodes = strtol(optarg, &p, 10);
+			if (optarg == p || nr_vnodes < 1
+				|| UINT16_MAX < nr_vnodes || *p != '\0') {
+				sd_err("Invalid number of vnodes '%s': must be "
+					"an integer between 1 and %u",
+					optarg, UINT16_MAX);
+				exit(1);
+			}
+			break;
 		default:
 			usage(1);
 			break;
@@ -772,6 +797,8 @@ int main(int argc, char **argv)
 	if (nr_vnodes == 0) {
 		sys->gateway_only = true;
 		sys->disk_space = 0;
+	} else if (nr_vnodes == -1) {
+		nr_vnodes = SD_DEFAULT_VNODES;
 	}
 
 	if (optind != argc) {
-- 
1.7.1






More information about the sheepdog mailing list