[sheepdog] [PATCH v3] new feature of changing the replica number of existing cluster/vdi
Ruoyu
liangry at ucweb.com
Wed May 21 09:14:41 CEST 2014
1. To set the replica number of cluster:
dog alter cluster-copy -c <copies>
2. To set the replica number of standalone vdi who has neither
parent nor children:
dog alter vdi-copy -c <copies> <vdiname>
3. To set the replica number of shared vdi who has parent or children,
please run dog vdi clone command with -R (--root) option first:
dog vdi clone -s <snapshot> -R <src vdi> <dst vdi>
It will deep copy a brand new standalone vdi base on source vdi
so that we can run dog vdi copies to change destinate vdi's
replica number later.
Signed-off-by: Ruoyu <liangry at ucweb.com>
---
dog/Makefile.am | 2 +-
dog/alter.c | 226 +++++++++++++++++++++++++++++++++++++++++++++++
dog/dog.c | 1 +
dog/dog.h | 4 +
dog/vdi.c | 14 ++-
include/internal_proto.h | 2 +
sheep/ops.c | 43 +++++++++
7 files changed, 289 insertions(+), 3 deletions(-)
create mode 100644 dog/alter.c
diff --git a/dog/Makefile.am b/dog/Makefile.am
index a7ead61..18cb114 100644
--- a/dog/Makefile.am
+++ b/dog/Makefile.am
@@ -25,7 +25,7 @@ sbin_PROGRAMS = dog
dog_SOURCES = farm/object_tree.c farm/sha1_file.c farm/snap.c \
farm/trunk.c farm/farm.c farm/slice.c \
- dog.c common.c treeview.c vdi.c node.c cluster.c
+ dog.c common.c treeview.c vdi.c node.c cluster.c alter.c
if BUILD_TRACE
dog_SOURCES += trace.c
diff --git a/dog/alter.c b/dog/alter.c
new file mode 100644
index 0000000..1ca9a18
--- /dev/null
+++ b/dog/alter.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2011 Nippon Telegraph and Telephone Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <time.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/time.h>
+
+#include "dog.h"
+
+static struct sd_option alter_options[] = {
+ {'c', "copies", true, "specify the data redundancy level"},
+ { 0, NULL, false, NULL },
+};
+
+static struct alter_cmd_data {
+ uint8_t copies;
+ uint8_t copy_policy;
+} alter_cmd_data;
+
+#define ALTER_CLUSTER_COPY_PRINT \
+ " __\n" \
+ " ()'`;\n" \
+ " /\\|` Caution! Changing the # of replica will affect\n" \
+ " / | all the VDIs to be created later.\n" \
+ "(/_)_|_ Are you sure you want to continue? [yes/no]: "
+
+static int alter_cluster_copy(int argc, char **argv)
+{
+ int ret, log_length;
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+ struct epoch_log *logs;
+
+ if (alter_cmd_data.copy_policy != 0) {
+ sd_err("changing redundancy level to erasure code is not supported yet.");
+ return EXIT_USAGE;
+ }
+ if (!alter_cmd_data.copies) {
+ alter_cmd_data.copies = SD_DEFAULT_COPIES;
+ printf("new cluster redundancy level is not specified, "
+ "use %d as default.\n", SD_DEFAULT_COPIES);
+ }
+
+ if (alter_cmd_data.copies > sd_nodes_nr) {
+ char info[1024];
+ snprintf(info, sizeof(info), "Number of copies (%d) is larger "
+ "than number of nodes (%d).\n"
+ "Are you sure you want to continue? [yes/no]: ",
+ alter_cmd_data.copies, sd_nodes_nr);
+ confirm(info);
+ }
+
+ log_length = sd_epoch * sizeof(struct epoch_log);
+ logs = xmalloc(log_length);
+ sd_init_req(&hdr, SD_OP_STAT_CLUSTER);
+ hdr.data_length = log_length;
+ ret = dog_exec_req(&sd_nid, &hdr, logs);
+ if (ret < 0)
+ goto failure;
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ sd_err("Response's result: %s", sd_strerror(rsp->result));
+ goto failure;
+ }
+ if (logs->copy_policy) {
+ sd_err("the cluster's copy policy is erasure code, "
+ "changing it is not supported yet.");
+ goto failure;
+ }
+ if (logs->nr_copies == alter_cmd_data.copies) {
+ sd_err("the cluster's redundancy level is already set to %d, "
+ "nothing changed.", alter_cmd_data.copies);
+ goto failure;
+ }
+
+ confirm(ALTER_CLUSTER_COPY_PRINT);
+
+ sd_init_req(&hdr, SD_OP_ALTER_CLUSTER_COPY);
+ hdr.cluster.copies = alter_cmd_data.copies;
+ hdr.cluster.copy_policy = alter_cmd_data.copy_policy;
+ ret = send_light_req(&sd_nid, &hdr);
+ if (ret == 0) {
+ sd_info("the cluster's redundancy level is set to %d, the old one was %d.",
+ alter_cmd_data.copies, logs->nr_copies);
+ goto success;
+ } else {
+ sd_err("set the cluster's redundancy level failure.");
+ goto failure;
+ }
+
+success:
+ free(logs);
+ return EXIT_SUCCESS;
+failure:
+ free(logs);
+ return EXIT_FAILURE;
+}
+
+#define ALTER_VDI_COPY_PRINT \
+ " __\n" \
+ " ()'`;\n" \
+ " /\\|` Caution! Changing the # of replica will affect\n" \
+ " / | the specified VDI and trigger recovery.\n" \
+ "(/_)_|_ Are you sure you want to continue? [yes/no]: "
+
+static int alter_vdi_copy(int argc, char **argv)
+{
+ int ret, old_nr_copies;
+ uint32_t vid, child_vdi_id[MAX_CHILDREN];
+ const char *vdiname = argv[optind++];
+ char buf[SD_INODE_HEADER_SIZE];
+ struct sd_inode *inode = (struct sd_inode *)buf;
+ struct sd_req hdr;
+
+ if (alter_cmd_data.copy_policy != 0) {
+ sd_err("changing redundancy level to erasure code is not supported yet.");
+ return EXIT_USAGE;
+ }
+ if (!alter_cmd_data.copies) {
+ alter_cmd_data.copies = SD_DEFAULT_COPIES;
+ printf("new vdi redundancy level is not specified, "
+ "use %d as default.\n", SD_DEFAULT_COPIES);
+ }
+
+ if (alter_cmd_data.copies > sd_nodes_nr) {
+ char info[1024];
+ snprintf(info, sizeof(info), "Number of copies (%d) is larger "
+ "than number of nodes (%d).\n"
+ "Are you sure you want to continue? [yes/no]: ",
+ alter_cmd_data.copies, sd_nodes_nr);
+ confirm(info);
+ }
+
+ ret = read_vdi_obj(vdiname, 0, "", &vid, inode, SD_INODE_HEADER_SIZE);
+ if (ret != EXIT_SUCCESS) {
+ sd_err("read %s's vdi object failure.", vdiname);
+ return EXIT_FAILURE;
+ }
+
+ if (inode->copy_policy) {
+ sd_err("%s's copy policy is erasure code, "
+ "changing it is not supported yet.", vdiname);
+ return EXIT_FAILURE;
+ }
+
+ old_nr_copies = inode->nr_copies;
+ if (old_nr_copies == alter_cmd_data.copies) {
+ sd_err("%s's redundancy level is already set to %d, "
+ "nothing changed.", vdiname, old_nr_copies);
+ return EXIT_FAILURE;
+ }
+
+ memset(child_vdi_id, 0, sizeof(uint32_t) * MAX_CHILDREN);
+ if (inode->parent_vdi_id != 0 ||
+ memcmp(inode->child_vdi_id, child_vdi_id,
+ sizeof(uint32_t) * MAX_CHILDREN) != 0) {
+ sd_err("only standalone vdi supports changing redundancy level.");
+ sd_err("please clone it with -R option first.");
+ return EXIT_FAILURE;
+ }
+
+ confirm(ALTER_VDI_COPY_PRINT);
+
+ inode->nr_copies = alter_cmd_data.copies;
+ ret = dog_write_object(vid_to_vdi_oid(vid), 0, inode,
+ SD_INODE_HEADER_SIZE, 0, 0, old_nr_copies,
+ inode->copy_policy, false, true);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("overwrite the vdi object's header of %s failure "
+ "while setting its redundancy level.", vdiname);
+ return EXIT_FAILURE;
+ }
+
+ sd_init_req(&hdr, SD_OP_ALTER_VDI_COPY);
+ hdr.vdi_state.new_vid = vid;
+ hdr.vdi_state.copies = alter_cmd_data.copies;
+ hdr.vdi_state.copy_policy = alter_cmd_data.copy_policy;
+
+ ret = send_light_req(&sd_nid, &hdr);
+ if (ret == 0) {
+ sd_info("%s's redundancy level is set to %d, the old one was %d.",
+ vdiname, alter_cmd_data.copies, old_nr_copies);
+ return EXIT_SUCCESS;
+ }
+ sd_err("set %s's redundancy level failure.", vdiname);
+ return EXIT_FAILURE;
+}
+
+static struct subcommand alter_cmd[] = {
+ {"cluster-copy", NULL, "caph", "set the cluster's redundancy level",
+ NULL, CMD_NEED_NODELIST, alter_cluster_copy, alter_options},
+ {"vdi-copy", "<vdiname>", "caph", "set the vdi's redundancy level",
+ NULL, CMD_NEED_ARG|CMD_NEED_NODELIST, alter_vdi_copy, alter_options},
+ {NULL,},
+};
+
+static int alter_parser(int ch, const char *opt)
+{
+ switch (ch) {
+ case 'c':
+ alter_cmd_data.copies =
+ parse_copy(opt, &alter_cmd_data.copy_policy);
+ if (!alter_cmd_data.copies) {
+ sd_err("Invalid redundancy level %s.", opt);
+ exit(EXIT_FAILURE);
+ }
+ break;
+ }
+
+ return 0;
+}
+
+struct command alter_command = {
+ "alter",
+ alter_cmd,
+ alter_parser
+};
diff --git a/dog/dog.c b/dog/dog.c
index 7942b34..e97ef62 100644
--- a/dog/dog.c
+++ b/dog/dog.c
@@ -157,6 +157,7 @@ static void init_commands(const struct command **commands)
vdi_command,
node_command,
cluster_command,
+ alter_command,
#ifdef HAVE_TRACE
trace_command,
#endif
diff --git a/dog/dog.h b/dog/dog.h
index 59d5a1c..9b53b3f 100644
--- a/dog/dog.h
+++ b/dog/dog.h
@@ -99,9 +99,13 @@ int dog_bnode_writer(uint64_t oid, void *mem, unsigned int len, uint64_t offset,
int dog_bnode_reader(uint64_t oid, void **mem, unsigned int len,
uint64_t offset);
+int read_vdi_obj(const char *vdiname, int snapid, const char *tag,
+ uint32_t *pvid, struct sd_inode *inode, size_t size);
+
extern struct command vdi_command;
extern struct command node_command;
extern struct command cluster_command;
+extern struct command alter_command;
#ifdef HAVE_TRACE
extern struct command trace_command;
diff --git a/dog/vdi.c b/dog/vdi.c
index 4d7fd54..f14b11e 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -23,6 +23,8 @@
static struct sd_option vdi_options[] = {
{'P', "prealloc", false, "preallocate all the data objects"},
+ {'R', "root", false, "clone a root vdi whose parent id is 0 and\n"
+ " prealloc auto enabled"},
{'i', "index", true, "specify the index of data objects"},
{'s', "snapshot", true, "specify a snapshot id or tag name"},
{'x', "exclusive", false, "write in an exclusive mode"},
@@ -51,6 +53,7 @@ static struct vdi_cmd_data {
uint8_t copy_policy;
uint8_t store_policy;
uint64_t oid;
+ bool root;
} vdi_cmd_data = { ~0, };
struct get_vdi_info {
@@ -336,7 +339,7 @@ static int find_vdi_name(const char *vdiname, uint32_t snapid, const char *tag,
return 0;
}
-static int read_vdi_obj(const char *vdiname, int snapid, const char *tag,
+int read_vdi_obj(const char *vdiname, int snapid, const char *tag,
uint32_t *pvid, struct sd_inode *inode,
size_t size)
{
@@ -566,6 +569,9 @@ static int vdi_clone(int argc, char **argv)
if (ret != EXIT_SUCCESS)
goto out;
+ if (vdi_cmd_data.root == true)
+ base_vid = 0;
+
ret = do_vdi_create(dst_vdi, inode->vdi_size, base_vid, &new_vid, false,
inode->nr_copies, inode->copy_policy,
inode->store_policy);
@@ -2356,7 +2362,7 @@ static struct subcommand vdi_cmd[] = {
{"snapshot", "<vdiname>", "saphrv", "create a snapshot",
NULL, CMD_NEED_ARG,
vdi_snapshot, vdi_options},
- {"clone", "<src vdi> <dst vdi>", "sPcaphrv", "clone an image",
+ {"clone", "<src vdi> <dst vdi>", "sPRaphrv", "clone an image",
NULL, CMD_NEED_ARG,
vdi_clone, vdi_options},
{"delete", "<vdiname>", "saph", "delete an image",
@@ -2413,6 +2419,10 @@ static int vdi_parser(int ch, const char *opt)
case 'P':
vdi_cmd_data.prealloc = true;
break;
+ case 'R':
+ vdi_cmd_data.root = true;
+ vdi_cmd_data.prealloc = true;
+ break;
case 'i':
vdi_cmd_data.index = strtol(opt, &p, 10);
if (opt == p) {
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 0eb7227..ba9cd86 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -101,6 +101,8 @@
#define SD_OP_NFS_DELETE 0xBC
#define SD_OP_EXIST 0xBD
#define SD_OP_CLUSTER_INFO 0xBE
+#define SD_OP_ALTER_CLUSTER_COPY 0xC0
+#define SD_OP_ALTER_VDI_COPY 0xC1
/* internal flags for hdr.flags, must be above 0x80 */
#define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/sheep/ops.c b/sheep/ops.c
index b9550f0..ce1b49c 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -714,6 +714,35 @@ static int cluster_recovery_completion(const struct sd_req *req,
return SD_RES_SUCCESS;
}
+static int cluster_alter_cluster_copy(const struct sd_req *req,
+ struct sd_rsp *rsp, void *data)
+{
+ if (req->cluster.copy_policy != 0)
+ return SD_RES_INVALID_PARMS;
+
+ sys->cinfo.nr_copies = req->cluster.copies;
+ return set_cluster_config(&sys->cinfo);
+}
+
+static int cluster_alter_vdi_copy(const struct sd_req *req,
+ struct sd_rsp *rsp, void *data)
+{
+ if (req->cluster.copy_policy != 0)
+ return SD_RES_INVALID_PARMS;
+
+ uint32_t vid = req->vdi_state.new_vid;
+ int nr_copies = req->vdi_state.copies;
+ struct vnode_info *vinfo;
+
+ add_vdi_state(vid, nr_copies, false, 0);
+
+ vinfo = get_vnode_info();
+ start_recovery(vinfo, vinfo, false);
+ put_vnode_info(vinfo);
+
+ return SD_RES_SUCCESS;
+}
+
static bool node_size_varied(void)
{
uint64_t new, used, old = sys->this_node.space;
@@ -1179,6 +1208,20 @@ static struct sd_op_template sd_ops[] = {
.process_main = cluster_disable_recover,
},
+ [SD_OP_ALTER_CLUSTER_COPY] = {
+ .name = "ALTER_CLUSTER_COPY",
+ .type = SD_OP_TYPE_CLUSTER,
+ .is_admin_op = true,
+ .process_main = cluster_alter_cluster_copy,
+ },
+
+ [SD_OP_ALTER_VDI_COPY] = {
+ .name = "ALTER_VDI_COPY",
+ .type = SD_OP_TYPE_CLUSTER,
+ .is_admin_op = true,
+ .process_main = cluster_alter_vdi_copy,
+ },
+
/* local operations */
[SD_OP_RELEASE_VDI] = {
.name = "RELEASE_VDI",
--
1.8.3.2
More information about the sheepdog
mailing list