[sheepdog] [PATCH v2] new feature of changing the replica number of existing cluster/vdi

Ruoyu liangry at ucweb.com
Mon May 19 09:34:59 CEST 2014


1. To set the replica number of cluster:
    dog cluster copies <num>

2. To set the replica number of standalone vdi who has neither
   parent nor children:
    dog vdi copies <vdiname> <num>

3. To set the replica number of shared vdi who has parent or children,
   please run dog vdi clone command with -R (--root) option first:
    dog vdi clone -s <snapshot> -R <src vdi> <dst vdi>
   It will deep copy a brand new standalone vdi base on source vdi
   so that we can run dog vdi copies to change destinate vdi's
   replica number later.

Signed-off-by: Ruoyu <liangry at ucweb.com>
---
 dog/cluster.c            | 73 ++++++++++++++++++++++++++++++++++++
 dog/vdi.c                | 96 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/internal_proto.h |  2 +
 sheep/ops.c              | 43 ++++++++++++++++++++++
 4 files changed, 213 insertions(+), 1 deletion(-)

diff --git a/dog/cluster.c b/dog/cluster.c
index 4af1e7c..7235c5f 100644
--- a/dog/cluster.c
+++ b/dog/cluster.c
@@ -545,6 +545,77 @@ static int cluster_check(int argc, char **argv)
 	return EXIT_SUCCESS;
 }
 
+#define SET_COPIES_PRINT				\
+	"    __\n"				\
+	"   ()'`;\n"				\
+	"   /\\|`  Caution! Changing the # of replica will affect\n"	\
+	"  /  |   all the VDIs to be created later.\n" \
+	"(/_)_|_  Are you sure you want to continue? [yes/no]: "
+
+static int cluster_copies(int argc, char **argv)
+{
+	int ret, log_length;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	struct epoch_log *logs;
+
+	cluster_cmd_data.copies = parse_copy(argv[optind],
+				&cluster_cmd_data.copy_policy);
+	if (cluster_cmd_data.copy_policy != 0) {
+		sd_err("changing redundancy level to erasure code is not supported yet.");
+		return EXIT_USAGE;
+	}
+	if (cluster_cmd_data.copies <= 0) {
+		sd_err("invalid redundancy level.");
+		return EXIT_USAGE;
+	}
+
+	log_length = sd_epoch * sizeof(struct epoch_log);
+	logs = xmalloc(log_length);
+	sd_init_req(&hdr, SD_OP_STAT_CLUSTER);
+	hdr.data_length = log_length;
+	ret = dog_exec_req(&sd_nid, &hdr, logs);
+	if (ret < 0)
+		goto failure;
+
+	if (rsp->result != SD_RES_SUCCESS) {
+		sd_err("Response's result: %s", sd_strerror(rsp->result));
+		goto failure;
+	}
+	if (logs->copy_policy) {
+		sd_err("the cluster's copy policy is erasure code, "
+			   "changing it is not supported yet.");
+		goto failure;
+	}
+	if (logs->nr_copies == cluster_cmd_data.copies) {
+		sd_err("the cluster's redundancy level is already set to %d.",
+				cluster_cmd_data.copies);
+		goto failure;
+	}
+
+	confirm(SET_COPIES_PRINT);
+
+	sd_init_req(&hdr, SD_OP_SET_CLUSTER_COPIES);
+	hdr.cluster.copies = cluster_cmd_data.copies;
+	hdr.cluster.copy_policy = cluster_cmd_data.copy_policy;
+	ret = send_light_req(&sd_nid, &hdr);
+	if (ret == 0) {
+		sd_info("the cluster's redundancy level is set to %d, the old one was %d.",
+				cluster_cmd_data.copies, logs->nr_copies);
+		goto success;
+	} else {
+		sd_err("set the cluster's redundancy level failure.");
+		goto failure;
+	}
+
+success:
+	free(logs);
+	return EXIT_SUCCESS;
+failure:
+	free(logs);
+	return EXIT_FAILURE;
+}
+
 static struct subcommand cluster_cmd[] = {
 	{"info", NULL, "aprhs", "show cluster information",
 	 NULL, CMD_NEED_NODELIST, cluster_info, cluster_options},
@@ -563,6 +634,8 @@ static struct subcommand cluster_cmd[] = {
 	 cluster_reweight, cluster_options},
 	{"check", NULL, "aph", "check and repair cluster", NULL,
 	 CMD_NEED_NODELIST, cluster_check, cluster_options},
+	{"copies", "<num>", "aph", "set the cluster's redundancy level", NULL,
+	 CMD_NEED_ARG|CMD_NEED_NODELIST, cluster_copies, cluster_options},
 	{NULL,},
 };
 
diff --git a/dog/vdi.c b/dog/vdi.c
index 4d7fd54..817c92c 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -23,6 +23,8 @@
 
 static struct sd_option vdi_options[] = {
 	{'P', "prealloc", false, "preallocate all the data objects"},
+	{'R', "root", false, "clone a root vdi whose parent id is 0 and\n"
+	 "                          prealloc auto enabled"},
 	{'i', "index", true, "specify the index of data objects"},
 	{'s', "snapshot", true, "specify a snapshot id or tag name"},
 	{'x', "exclusive", false, "write in an exclusive mode"},
@@ -51,6 +53,7 @@ static struct vdi_cmd_data {
 	uint8_t copy_policy;
 	uint8_t store_policy;
 	uint64_t oid;
+	bool root;
 } vdi_cmd_data = { ~0, };
 
 struct get_vdi_info {
@@ -566,6 +569,9 @@ static int vdi_clone(int argc, char **argv)
 	if (ret != EXIT_SUCCESS)
 		goto out;
 
+	if (vdi_cmd_data.root == true)
+		base_vid = 0;
+
 	ret = do_vdi_create(dst_vdi, inode->vdi_size, base_vid, &new_vid, false,
 			    inode->nr_copies, inode->copy_policy,
 			    inode->store_policy);
@@ -2346,6 +2352,87 @@ static int vdi_cache(int argc, char **argv)
 	return do_generic_subcommand(vdi_cache_cmd, argc, argv);
 }
 
+#define SET_COPIES_PRINT				\
+	"    __\n"				\
+	"   ()'`;\n"				\
+	"   /\\|`  Caution! Changing the # of replica will affect\n"	\
+	"  /  |   the specified VDI and trigger recovery.\n" \
+	"(/_)_|_  Are you sure you want to continue? [yes/no]: "
+
+static int vdi_copies(int argc, char **argv)
+{
+	int ret, old_nr_copies;
+	uint32_t vid, child_vdi_id[MAX_CHILDREN];
+	const char *vdiname = argv[optind++];
+	char buf[SD_INODE_HEADER_SIZE];
+	struct sd_inode *inode = (struct sd_inode *)buf;
+	struct sd_req hdr;
+
+	vdi_cmd_data.nr_copies = parse_copy(argv[optind],
+				&vdi_cmd_data.copy_policy);
+	if (vdi_cmd_data.copy_policy != 0) {
+		sd_err("changing redundancy level to erasure code is not supported yet.");
+		return EXIT_USAGE;
+	}
+	if (vdi_cmd_data.nr_copies <= 0) {
+		sd_err("invalid redundancy level.");
+		return EXIT_USAGE;
+	}
+
+	ret = read_vdi_obj(vdiname, 0, "", &vid, inode, SD_INODE_HEADER_SIZE);
+	if (ret != EXIT_SUCCESS) {
+		sd_err("read %s's vdi object failure.", vdiname);
+		return EXIT_FAILURE;
+	}
+
+	if (inode->copy_policy) {
+		sd_err("%s's copy policy is erasure code, "
+			   "changing it is not supported yet.", vdiname);
+		return EXIT_FAILURE;
+	}
+
+	old_nr_copies = inode->nr_copies;
+	if (old_nr_copies == vdi_cmd_data.nr_copies) {
+		sd_err("%s's redundancy level is already set to %d.",
+				vdiname, old_nr_copies);
+		return EXIT_FAILURE;
+	}
+
+	memset(child_vdi_id, 0, sizeof(uint32_t) * MAX_CHILDREN);
+	if (inode->parent_vdi_id != 0 ||
+			memcmp(inode->child_vdi_id, child_vdi_id,
+			sizeof(uint32_t) * MAX_CHILDREN) != 0) {
+		sd_err("only standalone vdi supports changing redundancy level.");
+		sd_err("please clone it with -R option first.");
+		return EXIT_FAILURE;
+	}
+
+	confirm(SET_COPIES_PRINT);
+
+	inode->nr_copies = vdi_cmd_data.nr_copies;
+	ret = dog_write_object(vid_to_vdi_oid(vid), 0, inode,
+			SD_INODE_HEADER_SIZE, 0, 0, old_nr_copies,
+			inode->copy_policy, false, true);
+	if (ret != SD_RES_SUCCESS) {
+		sd_err("overwrite the vdi object's header of %s failure "
+			   "while setting its redundancy level.", vdiname);
+		return EXIT_FAILURE;
+	}
+
+	sd_init_req(&hdr, SD_OP_SET_VDI_COPIES);
+	hdr.vdi_state.new_vid = vid;
+	hdr.vdi_state.copies = vdi_cmd_data.nr_copies;
+
+	ret = send_light_req(&sd_nid, &hdr);
+	if (ret == 0) {
+		sd_info("%s's redundancy level is set to %d, the old one was %d.",
+				vdiname, vdi_cmd_data.nr_copies, old_nr_copies);
+		return EXIT_SUCCESS;
+	}
+	sd_err("set %s's redundancy level failure.", vdiname);
+	return EXIT_FAILURE;
+}
+
 static struct subcommand vdi_cmd[] = {
 	{"check", "<vdiname>", "saph", "check and repair image's consistency",
 	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
@@ -2356,7 +2443,7 @@ static struct subcommand vdi_cmd[] = {
 	{"snapshot", "<vdiname>", "saphrv", "create a snapshot",
 	 NULL, CMD_NEED_ARG,
 	 vdi_snapshot, vdi_options},
-	{"clone", "<src vdi> <dst vdi>", "sPcaphrv", "clone an image",
+	{"clone", "<src vdi> <dst vdi>", "sPRaphrv", "clone an image",
 	 NULL, CMD_NEED_ARG,
 	 vdi_clone, vdi_options},
 	{"delete", "<vdiname>", "saph", "delete an image",
@@ -2402,6 +2489,9 @@ static struct subcommand vdi_cmd[] = {
 	{"cache", "<vdiname>", "saph", "Run 'dog vdi cache' for more information",
 	 vdi_cache_cmd, CMD_NEED_ARG,
 	 vdi_cache, vdi_options},
+	{"copies", "<vdiname> <num>", "aph", "set the vdi's redundancy level",
+	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
+	 vdi_copies, vdi_options},
 	{NULL,},
 };
 
@@ -2413,6 +2503,10 @@ static int vdi_parser(int ch, const char *opt)
 	case 'P':
 		vdi_cmd_data.prealloc = true;
 		break;
+	case 'R':
+		vdi_cmd_data.root = true;
+		vdi_cmd_data.prealloc = true;
+		break;
 	case 'i':
 		vdi_cmd_data.index = strtol(opt, &p, 10);
 		if (opt == p) {
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 0eb7227..4e95e55 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -101,6 +101,8 @@
 #define SD_OP_NFS_DELETE	0xBC
 #define SD_OP_EXIST	0xBD
 #define SD_OP_CLUSTER_INFO	0xBE
+#define SD_OP_SET_CLUSTER_COPIES	0xC0
+#define SD_OP_SET_VDI_COPIES	0xC1
 
 /* internal flags for hdr.flags, must be above 0x80 */
 #define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/sheep/ops.c b/sheep/ops.c
index b9550f0..523dfbc 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -714,6 +714,35 @@ static int cluster_recovery_completion(const struct sd_req *req,
 	return SD_RES_SUCCESS;
 }
 
+static int cluster_set_cluster_copies(const struct sd_req *req,
+			struct sd_rsp *rsp, void *data)
+{
+	if (req->cluster.copy_policy != 0)
+		return SD_RES_INVALID_PARMS;
+
+	sys->cinfo.nr_copies = req->cluster.copies;
+	return set_cluster_config(&sys->cinfo);
+}
+
+static int cluster_set_vdi_copies(const struct sd_req *req,
+			struct sd_rsp *rsp, void *data)
+{
+	if (req->cluster.copy_policy != 0)
+		return SD_RES_INVALID_PARMS;
+
+	uint32_t vid = req->vdi_state.new_vid;
+	int nr_copies = req->vdi_state.copies;
+	struct vnode_info *vinfo;
+
+	add_vdi_state(vid, nr_copies, false, 0);
+
+	vinfo = get_vnode_info();
+	start_recovery(vinfo, vinfo, false);
+	put_vnode_info(vinfo);
+
+	return SD_RES_SUCCESS;
+}
+
 static bool node_size_varied(void)
 {
 	uint64_t new, used, old = sys->this_node.space;
@@ -1179,6 +1208,20 @@ static struct sd_op_template sd_ops[] = {
 		.process_main = cluster_disable_recover,
 	},
 
+	[SD_OP_SET_CLUSTER_COPIES] = {
+		.name = "SET_CLUSTER_COPIES",
+		.type = SD_OP_TYPE_CLUSTER,
+		.is_admin_op = true,
+		.process_main = cluster_set_cluster_copies,
+	},
+
+	[SD_OP_SET_VDI_COPIES] = {
+		.name = "SET_VDI_COPIES",
+		.type = SD_OP_TYPE_CLUSTER,
+		.is_admin_op = true,
+		.process_main = cluster_set_vdi_copies,
+	},
+
 	/* local operations */
 	[SD_OP_RELEASE_VDI] = {
 		.name = "RELEASE_VDI",
-- 
1.8.3.2





More information about the sheepdog mailing list