[sheepdog] [PATCH v3] change the redundancy level of existing cluster and vdi
Ruoyu
liangry at ucweb.com
Tue May 27 05:05:27 CEST 2014
This is a new feature to allow user to change the redundancy level of
formatted cluster and created vdi. Only simple replica is supported now.
1. To set the replica number of formatted cluster:
dog alter cluster-copy -c <copies>
2. To set the replica number of standalone vdi who has neither
parent nor children:
dog alter vdi-copy -c <copies> <vdiname>
3. To set the replica number of shared vdi who has parent or children,
to avoid shared data inconsistency, please run dog vdi clone command
with -n (--no-share) option first:
dog vdi clone -s <snapshot> -n <src vdi> <dst vdi>
It will deep copy a brand new standalone vdi base on source vdi
so that we can run dog alter vdi-copy to change destinate vdi's
replica number later.
Signed-off-by: Ruoyu <liangry at ucweb.com>
---
dog/Makefile.am | 3 +-
dog/alter.c | 231 +++++++++++++++++++++++++++++++++++++++++++++++
dog/dog.c | 1 +
dog/dog.h | 4 +
dog/vdi.c | 14 ++-
include/internal_proto.h | 2 +
sheep/ops.c | 43 +++++++++
7 files changed, 295 insertions(+), 3 deletions(-)
create mode 100644 dog/alter.c
diff --git a/dog/Makefile.am b/dog/Makefile.am
index a7ead61..bd86452 100644
--- a/dog/Makefile.am
+++ b/dog/Makefile.am
@@ -25,7 +25,8 @@ sbin_PROGRAMS = dog
dog_SOURCES = farm/object_tree.c farm/sha1_file.c farm/snap.c \
farm/trunk.c farm/farm.c farm/slice.c \
- dog.c common.c treeview.c vdi.c node.c cluster.c
+ dog.c common.c treeview.c vdi.c node.c cluster.c \
+ alter.c
if BUILD_TRACE
dog_SOURCES += trace.c
diff --git a/dog/alter.c b/dog/alter.c
new file mode 100644
index 0000000..c2126bf
--- /dev/null
+++ b/dog/alter.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2011 Nippon Telegraph and Telephone Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <time.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/time.h>
+
+#include "dog.h"
+
+static struct sd_option alter_options[] = {
+ {'c', "copies", true, "specify the data redundancy level"},
+ { 0, NULL, false, NULL },
+};
+
+static struct alter_cmd_data {
+ uint8_t copies;
+ uint8_t copy_policy;
+} alter_cmd_data;
+
+#define ALTER_CLUSTER_COPY_PRINT \
+ " __\n" \
+ " ()'`;\n" \
+ " /\\|` Caution! Changing cluster's redundancy level will affect\n" \
+ " / | all the VDIs to be created later.\n" \
+ "(/_)_|_ Are you sure you want to continue? [yes/no]: "
+
+static int alter_cluster_copy(int argc, char **argv)
+{
+ int ret, log_length;
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+ struct epoch_log *logs;
+
+ if (alter_cmd_data.copy_policy != 0) {
+ sd_err("Changing redundancy level of erasure coded vdi "
+ "is not supported yet.");
+ return EXIT_USAGE;
+ }
+ if (!alter_cmd_data.copies) {
+ alter_cmd_data.copies = SD_DEFAULT_COPIES;
+ printf("The cluster's redundancy level is not specified, "
+ "use %d as default.\n", SD_DEFAULT_COPIES);
+ }
+
+ if (alter_cmd_data.copies > sd_nodes_nr) {
+ char info[1024];
+ snprintf(info, sizeof(info), "Number of copies (%d) is larger "
+ "than number of nodes (%d).\n"
+ "Are you sure you want to continue? [yes/no]: ",
+ alter_cmd_data.copies, sd_nodes_nr);
+ confirm(info);
+ }
+
+ log_length = sd_epoch * sizeof(struct epoch_log);
+ logs = xmalloc(log_length);
+ sd_init_req(&hdr, SD_OP_STAT_CLUSTER);
+ hdr.data_length = log_length;
+ ret = dog_exec_req(&sd_nid, &hdr, logs);
+ if (ret < 0)
+ goto failure;
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ sd_err("Response's result: %s", sd_strerror(rsp->result));
+ goto failure;
+ }
+ if (logs->copy_policy) {
+ sd_err("The cluster's copy policy is erasure code, "
+ "changing it is not supported yet.");
+ goto failure;
+ }
+ if (logs->nr_copies == alter_cmd_data.copies) {
+ sd_err("The cluster's redundancy level is already set to %d, "
+ "nothing changed.", alter_cmd_data.copies);
+ goto failure;
+ }
+
+ confirm(ALTER_CLUSTER_COPY_PRINT);
+
+ sd_init_req(&hdr, SD_OP_ALTER_CLUSTER_COPY);
+ hdr.cluster.copies = alter_cmd_data.copies;
+ hdr.cluster.copy_policy = alter_cmd_data.copy_policy;
+ ret = send_light_req(&sd_nid, &hdr);
+ if (ret == 0) {
+ sd_info("The cluster's redundancy level is set to %d, "
+ "the old one was %d.",
+ alter_cmd_data.copies, logs->nr_copies);
+ goto success;
+ } else {
+ sd_err("Changing the cluster's redundancy level failure.");
+ goto failure;
+ }
+
+success:
+ free(logs);
+ return EXIT_SUCCESS;
+failure:
+ free(logs);
+ return EXIT_FAILURE;
+}
+
+#define ALTER_VDI_COPY_PRINT \
+ " __\n" \
+ " ()'`;\n" \
+ " /\\|` Caution! Changing VDI's redundancy level will affect\n" \
+ " / | the VDI itself only and trigger recovery.\n" \
+ "(/_)_|_ Are you sure you want to continue? [yes/no]: "
+
+static int alter_vdi_copy(int argc, char **argv)
+{
+ int ret, old_nr_copies;
+ uint32_t vid, child_vdi_id[MAX_CHILDREN];
+ const char *vdiname = argv[optind++];
+ char buf[SD_INODE_HEADER_SIZE];
+ struct sd_inode *inode = (struct sd_inode *)buf;
+ struct sd_req hdr;
+
+ if (alter_cmd_data.copy_policy != 0) {
+ sd_err("Changing redundancy level of erasure coded vdi "
+ "is not supported yet.");
+ return EXIT_USAGE;
+ }
+ if (!alter_cmd_data.copies) {
+ alter_cmd_data.copies = SD_DEFAULT_COPIES;
+ printf("The vdi's redundancy level is not specified, "
+ "use %d as default.\n", SD_DEFAULT_COPIES);
+ }
+
+ if (alter_cmd_data.copies > sd_nodes_nr) {
+ char info[1024];
+ snprintf(info, sizeof(info), "Number of copies (%d) is larger "
+ "than number of nodes (%d).\n"
+ "Are you sure you want to continue? [yes/no]: ",
+ alter_cmd_data.copies, sd_nodes_nr);
+ confirm(info);
+ }
+
+ ret = read_vdi_obj(vdiname, 0, "", &vid, inode, SD_INODE_HEADER_SIZE);
+ if (ret != EXIT_SUCCESS) {
+ sd_err("Reading %s's vdi object failure.", vdiname);
+ return EXIT_FAILURE;
+ }
+
+ if (inode->copy_policy) {
+ sd_err("%s's copy policy is erasure code, "
+ "changing it is not supported yet.", vdiname);
+ return EXIT_FAILURE;
+ }
+
+ old_nr_copies = inode->nr_copies;
+ if (old_nr_copies == alter_cmd_data.copies) {
+ sd_err("%s's redundancy level is already set to %d, "
+ "nothing changed.", vdiname, old_nr_copies);
+ return EXIT_FAILURE;
+ }
+
+ memset(child_vdi_id, 0, sizeof(uint32_t) * MAX_CHILDREN);
+ if (inode->parent_vdi_id != 0 ||
+ memcmp(inode->child_vdi_id, child_vdi_id,
+ sizeof(uint32_t) * MAX_CHILDREN) != 0) {
+ sd_err("Only standalone vdi supports "
+ "changing redundancy level.");
+ sd_err("Please clone %s with -n (--no-share) "
+ "option first.", vdiname);
+ return EXIT_FAILURE;
+ }
+
+ confirm(ALTER_VDI_COPY_PRINT);
+
+ inode->nr_copies = alter_cmd_data.copies;
+ ret = dog_write_object(vid_to_vdi_oid(vid), 0, inode,
+ SD_INODE_HEADER_SIZE, 0, 0, old_nr_copies,
+ inode->copy_policy, false, true);
+ if (ret != SD_RES_SUCCESS) {
+ sd_err("Overwrite the vdi object's header of %s failure "
+ "while setting its redundancy level.", vdiname);
+ return EXIT_FAILURE;
+ }
+
+ sd_init_req(&hdr, SD_OP_ALTER_VDI_COPY);
+ hdr.vdi_state.new_vid = vid;
+ hdr.vdi_state.copies = alter_cmd_data.copies;
+ hdr.vdi_state.copy_policy = alter_cmd_data.copy_policy;
+
+ ret = send_light_req(&sd_nid, &hdr);
+ if (ret == 0) {
+ sd_info("%s's redundancy level is set to %d, the old one was %d.",
+ vdiname, alter_cmd_data.copies, old_nr_copies);
+ return EXIT_SUCCESS;
+ }
+ sd_err("Changing %s's redundancy level failure.", vdiname);
+ return EXIT_FAILURE;
+}
+
+static struct subcommand alter_cmd[] = {
+ {"cluster-copy", NULL, "caph", "set the cluster's redundancy level",
+ NULL, CMD_NEED_NODELIST, alter_cluster_copy, alter_options},
+ {"vdi-copy", "<vdiname>", "caph", "set the vdi's redundancy level",
+ NULL, CMD_NEED_ARG|CMD_NEED_NODELIST, alter_vdi_copy, alter_options},
+ {NULL,},
+};
+
+static int alter_parser(int ch, const char *opt)
+{
+ switch (ch) {
+ case 'c':
+ alter_cmd_data.copies =
+ parse_copy(opt, &alter_cmd_data.copy_policy);
+ if (!alter_cmd_data.copies) {
+ sd_err("Invalid redundancy level %s.", opt);
+ exit(EXIT_FAILURE);
+ }
+ break;
+ }
+
+ return 0;
+}
+
+struct command alter_command = {
+ "alter",
+ alter_cmd,
+ alter_parser
+};
diff --git a/dog/dog.c b/dog/dog.c
index 7942b34..e97ef62 100644
--- a/dog/dog.c
+++ b/dog/dog.c
@@ -157,6 +157,7 @@ static void init_commands(const struct command **commands)
vdi_command,
node_command,
cluster_command,
+ alter_command,
#ifdef HAVE_TRACE
trace_command,
#endif
diff --git a/dog/dog.h b/dog/dog.h
index 59d5a1c..9b53b3f 100644
--- a/dog/dog.h
+++ b/dog/dog.h
@@ -99,9 +99,13 @@ int dog_bnode_writer(uint64_t oid, void *mem, unsigned int len, uint64_t offset,
int dog_bnode_reader(uint64_t oid, void **mem, unsigned int len,
uint64_t offset);
+int read_vdi_obj(const char *vdiname, int snapid, const char *tag,
+ uint32_t *pvid, struct sd_inode *inode, size_t size);
+
extern struct command vdi_command;
extern struct command node_command;
extern struct command cluster_command;
+extern struct command alter_command;
#ifdef HAVE_TRACE
extern struct command trace_command;
diff --git a/dog/vdi.c b/dog/vdi.c
index 4d7fd54..bdac72d 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -23,6 +23,8 @@
static struct sd_option vdi_options[] = {
{'P', "prealloc", false, "preallocate all the data objects"},
+ {'n', "no-share", false, "share nothing with its parent, "
+ "prealloc also enabled"},
{'i', "index", true, "specify the index of data objects"},
{'s', "snapshot", true, "specify a snapshot id or tag name"},
{'x', "exclusive", false, "write in an exclusive mode"},
@@ -51,6 +53,7 @@ static struct vdi_cmd_data {
uint8_t copy_policy;
uint8_t store_policy;
uint64_t oid;
+ bool no_share;
} vdi_cmd_data = { ~0, };
struct get_vdi_info {
@@ -336,7 +339,7 @@ static int find_vdi_name(const char *vdiname, uint32_t snapid, const char *tag,
return 0;
}
-static int read_vdi_obj(const char *vdiname, int snapid, const char *tag,
+int read_vdi_obj(const char *vdiname, int snapid, const char *tag,
uint32_t *pvid, struct sd_inode *inode,
size_t size)
{
@@ -566,6 +569,9 @@ static int vdi_clone(int argc, char **argv)
if (ret != EXIT_SUCCESS)
goto out;
+ if (vdi_cmd_data.no_share == true)
+ base_vid = 0;
+
ret = do_vdi_create(dst_vdi, inode->vdi_size, base_vid, &new_vid, false,
inode->nr_copies, inode->copy_policy,
inode->store_policy);
@@ -2356,7 +2362,7 @@ static struct subcommand vdi_cmd[] = {
{"snapshot", "<vdiname>", "saphrv", "create a snapshot",
NULL, CMD_NEED_ARG,
vdi_snapshot, vdi_options},
- {"clone", "<src vdi> <dst vdi>", "sPcaphrv", "clone an image",
+ {"clone", "<src vdi> <dst vdi>", "sPnaphrv", "clone an image",
NULL, CMD_NEED_ARG,
vdi_clone, vdi_options},
{"delete", "<vdiname>", "saph", "delete an image",
@@ -2413,6 +2419,10 @@ static int vdi_parser(int ch, const char *opt)
case 'P':
vdi_cmd_data.prealloc = true;
break;
+ case 'n':
+ vdi_cmd_data.no_share = true;
+ vdi_cmd_data.prealloc = true;
+ break;
case 'i':
vdi_cmd_data.index = strtol(opt, &p, 10);
if (opt == p) {
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 0eb7227..ba9cd86 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -101,6 +101,8 @@
#define SD_OP_NFS_DELETE 0xBC
#define SD_OP_EXIST 0xBD
#define SD_OP_CLUSTER_INFO 0xBE
+#define SD_OP_ALTER_CLUSTER_COPY 0xC0
+#define SD_OP_ALTER_VDI_COPY 0xC1
/* internal flags for hdr.flags, must be above 0x80 */
#define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/sheep/ops.c b/sheep/ops.c
index b9550f0..ce1b49c 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -714,6 +714,35 @@ static int cluster_recovery_completion(const struct sd_req *req,
return SD_RES_SUCCESS;
}
+static int cluster_alter_cluster_copy(const struct sd_req *req,
+ struct sd_rsp *rsp, void *data)
+{
+ if (req->cluster.copy_policy != 0)
+ return SD_RES_INVALID_PARMS;
+
+ sys->cinfo.nr_copies = req->cluster.copies;
+ return set_cluster_config(&sys->cinfo);
+}
+
+static int cluster_alter_vdi_copy(const struct sd_req *req,
+ struct sd_rsp *rsp, void *data)
+{
+ if (req->cluster.copy_policy != 0)
+ return SD_RES_INVALID_PARMS;
+
+ uint32_t vid = req->vdi_state.new_vid;
+ int nr_copies = req->vdi_state.copies;
+ struct vnode_info *vinfo;
+
+ add_vdi_state(vid, nr_copies, false, 0);
+
+ vinfo = get_vnode_info();
+ start_recovery(vinfo, vinfo, false);
+ put_vnode_info(vinfo);
+
+ return SD_RES_SUCCESS;
+}
+
static bool node_size_varied(void)
{
uint64_t new, used, old = sys->this_node.space;
@@ -1179,6 +1208,20 @@ static struct sd_op_template sd_ops[] = {
.process_main = cluster_disable_recover,
},
+ [SD_OP_ALTER_CLUSTER_COPY] = {
+ .name = "ALTER_CLUSTER_COPY",
+ .type = SD_OP_TYPE_CLUSTER,
+ .is_admin_op = true,
+ .process_main = cluster_alter_cluster_copy,
+ },
+
+ [SD_OP_ALTER_VDI_COPY] = {
+ .name = "ALTER_VDI_COPY",
+ .type = SD_OP_TYPE_CLUSTER,
+ .is_admin_op = true,
+ .process_main = cluster_alter_vdi_copy,
+ },
+
/* local operations */
[SD_OP_RELEASE_VDI] = {
.name = "RELEASE_VDI",
--
1.8.3.2
More information about the sheepdog
mailing list