[sheepdog] [PATCH] new feature of changing the replica number of existing cluster/vdi

Ruoyu liangry at ucweb.com
Mon May 19 09:54:01 CEST 2014


On 2014年05月16日 23:00, Hitoshi Mitake wrote:
> At Mon, 12 May 2014 18:31:42 +0800,
> Ruoyu wrote:
>> 1. to set the replica number of cluster:
>>      dog cluster copies <num>
>>
>> 2. to set the replica number of vdi:
>>      dog vdi copies <vdiname> <num>
>>
>> Signed-off-by: Ruoyu <liangry at ucweb.com>
>> ---
>>   dog/cluster.c            | 73 ++++++++++++++++++++++++++++++++++++++++++++++++
>>   dog/vdi.c                | 73 ++++++++++++++++++++++++++++++++++++++++++++++++
>>   include/internal_proto.h |  2 ++
>>   sheep/ops.c              | 43 ++++++++++++++++++++++++++++
>>   4 files changed, 191 insertions(+)
> I think changing copy number of vdi is dangerous. Assume that we are
> changing copy number of VDI A from 3 to 2 which has a snapshot B.
> Your cluster_set_vdi_copies() starts recovery process but it cannot
> change a number of copies actually because some of A's objects would
> have B's VID (B has younger VID), and get_vdi_copy_number() returns
> copy number based on VID part of OID. It introduces inconsistency.
>
> So I think you should forbid changing a number of copies when VDI has
> a parent or children. The changing would be similar to exporting VDI
> with qemu-img once and backporting with different number of
> copies. But the procedure requires lots of temporal space for storing
> disk image (which can be few TBs), so preparing a new command which
> can change a number of copies without requireing temporal space would
> be worthful.
Thanks for your opinion.
I submit a solution just now.
first, as you advise, forbid changing replica number when vdi has parent 
or children.
secondly, add -R (--root) option to vdi clone command. We can deep copy 
a standalone vdi has neither parent nor children so that we can run dog 
vdi copies later.
>
> Thanks,
> Hitoshi
>
>> diff --git a/dog/cluster.c b/dog/cluster.c
>> index 4af1e7c..6dd2ca6 100644
>> --- a/dog/cluster.c
>> +++ b/dog/cluster.c
>> @@ -545,6 +545,77 @@ static int cluster_check(int argc, char **argv)
>>   	return EXIT_SUCCESS;
>>   }
>>   
>> +#define SET_COPIES_PRINT				\
>> +	"    __\n"				\
>> +	"   ()'`;\n"				\
>> +	"   /\\|`  Caution! Changing the # of replica will affect\n"	\
>> +	"  /  |   all the VDIs to be created later.\n" \
>> +	"(/_)_|_  Are you sure you want to continue? [yes/no]: "
>> +
>> +static int cluster_copies(int argc, char **argv)
>> +{
>> +	int ret, log_length;
>> +	struct sd_req hdr;
>> +	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
>> +	struct epoch_log *logs;
>> +
>> +	cluster_cmd_data.copies = parse_copy(argv[optind],
>> +				&cluster_cmd_data.copy_policy);
>> +	if (cluster_cmd_data.copy_policy != 0) {
>> +		sd_err("changing the replica number to erasure code is not supported yet.");
>> +		return EXIT_USAGE;
>> +	}
>> +	if (cluster_cmd_data.copies <= 0) {
>> +		sd_err("invalid replica number.");
>> +		return EXIT_USAGE;
>> +	}
>> +
>> +	log_length = sd_epoch * sizeof(struct epoch_log);
>> +	logs = xmalloc(log_length);
>> +	sd_init_req(&hdr, SD_OP_STAT_CLUSTER);
>> +	hdr.data_length = log_length;
>> +	ret = dog_exec_req(&sd_nid, &hdr, logs);
>> +	if (ret < 0)
>> +		goto failure;
>> +
>> +	if (rsp->result != SD_RES_SUCCESS) {
>> +		sd_err("Response's result: %s", sd_strerror(rsp->result));
>> +		goto failure;
>> +	}
>> +	if (logs->copy_policy) {
>> +		sd_err("the cluster's copy policy is erasure code, "
>> +			   "changing it is not supported yet.");
>> +		goto failure;
>> +	}
>> +	if (logs->nr_copies == cluster_cmd_data.copies) {
>> +		sd_err("the cluster's replica number is already set to %d.",
>> +				cluster_cmd_data.copies);
>> +		goto failure;
>> +	}
>> +
>> +	confirm(SET_COPIES_PRINT);
>> +
>> +	sd_init_req(&hdr, SD_OP_SET_CLUSTER_COPIES);
>> +	hdr.cluster.copies = cluster_cmd_data.copies;
>> +	hdr.cluster.copy_policy = cluster_cmd_data.copy_policy;
>> +	ret = send_light_req(&sd_nid, &hdr);
>> +	if (ret == 0) {
>> +		sd_info("the cluster's replica number is set to %d, the old one was %d.",
>> +				cluster_cmd_data.copies, logs->nr_copies);
>> +		goto success;
>> +	} else {
>> +		sd_err("set the cluster's replica number failure.");
>> +		goto failure;
>> +	}
>> +
>> +success:
>> +	free(logs);
>> +	return EXIT_SUCCESS;
>> +failure:
>> +	free(logs);
>> +	return EXIT_FAILURE;
>> +}
>> +
>>   static struct subcommand cluster_cmd[] = {
>>   	{"info", NULL, "aprhs", "show cluster information",
>>   	 NULL, CMD_NEED_NODELIST, cluster_info, cluster_options},
>> @@ -563,6 +634,8 @@ static struct subcommand cluster_cmd[] = {
>>   	 cluster_reweight, cluster_options},
>>   	{"check", NULL, "aph", "check and repair cluster", NULL,
>>   	 CMD_NEED_NODELIST, cluster_check, cluster_options},
>> +	{"copies", "<num>", "aph", "set the cluster's replica number", NULL,
>> +	 CMD_NEED_ARG|CMD_NEED_NODELIST, cluster_copies, cluster_options},
>>   	{NULL,},
>>   };
>>   
>> diff --git a/dog/vdi.c b/dog/vdi.c
>> index 4d7fd54..f8f2421 100644
>> --- a/dog/vdi.c
>> +++ b/dog/vdi.c
>> @@ -2346,6 +2346,76 @@ static int vdi_cache(int argc, char **argv)
>>   	return do_generic_subcommand(vdi_cache_cmd, argc, argv);
>>   }
>>   
>> +#define SET_COPIES_PRINT				\
>> +	"    __\n"				\
>> +	"   ()'`;\n"				\
>> +	"   /\\|`  Caution! Changing the # of replica will affect\n"	\
>> +	"  /  |   the specified VDI and trigger recovery.\n" \
>> +	"(/_)_|_  Are you sure you want to continue? [yes/no]: "
>> +
>> +static int vdi_copies(int argc, char **argv)
>> +{
>> +	int ret, old_nr_copies;
>> +	uint32_t vid;
>> +	const char *vdiname = argv[optind++];
>> +	char buf[SD_INODE_HEADER_SIZE];
>> +	struct sd_inode *inode = (struct sd_inode *)buf;
>> +	struct sd_req hdr;
>> +
>> +	vdi_cmd_data.nr_copies = parse_copy(argv[optind],
>> +				&vdi_cmd_data.copy_policy);
>> +	if (vdi_cmd_data.copy_policy != 0) {
>> +		sd_err("changing the replica number to erasure code is not supported yet.");
>> +		return EXIT_USAGE;
>> +	}
>> +	if (vdi_cmd_data.nr_copies <= 0) {
>> +		sd_err("invalid replica number.");
>> +		return EXIT_USAGE;
>> +	}
>> +
>> +	ret = read_vdi_obj(vdiname, 0, "", &vid, inode, SD_INODE_HEADER_SIZE);
>> +	if (ret != EXIT_SUCCESS) {
>> +		sd_err("read %s's vdi object failure.", vdiname);
>> +		return EXIT_FAILURE;
>> +	}
>> +	if (inode->copy_policy) {
>> +		sd_err("%s's copy policy is erasure code, "
>> +			   "changing it is not supported yet.", vdiname);
>> +		return EXIT_FAILURE;
>> +	}
>> +	old_nr_copies = inode->nr_copies;
>> +	if (old_nr_copies == vdi_cmd_data.nr_copies) {
>> +		sd_err("%s's replica number is already set to %d.",
>> +				vdiname, old_nr_copies);
>> +		return EXIT_FAILURE;
>> +	}
>> +
>> +	confirm(SET_COPIES_PRINT);
>> +
>> +	inode->nr_copies = vdi_cmd_data.nr_copies;
>> +	ret = dog_write_object(vid_to_vdi_oid(vid), 0, inode,
>> +			SD_INODE_HEADER_SIZE, 0, 0, old_nr_copies,
>> +			inode->copy_policy, false, true);
>> +	if (ret != SD_RES_SUCCESS) {
>> +		sd_err("overwrite the vdi object's header of %s failure "
>> +			   "while setting its replica number.", vdiname);
>> +		return EXIT_FAILURE;
>> +	}
>> +
>> +	sd_init_req(&hdr, SD_OP_SET_VDI_COPIES);
>> +	hdr.vdi_state.new_vid = vid;
>> +	hdr.vdi_state.copies = vdi_cmd_data.nr_copies;
>> +
>> +	ret = send_light_req(&sd_nid, &hdr);
>> +	if (ret == 0) {
>> +		sd_info("%s's replica number is set to %d, the old one was %d.",
>> +				vdiname, vdi_cmd_data.nr_copies, old_nr_copies);
>> +		return EXIT_SUCCESS;
>> +	}
>> +	sd_err("set %s's replica number failure.", vdiname);
>> +	return EXIT_FAILURE;
>> +}
>> +
>>   static struct subcommand vdi_cmd[] = {
>>   	{"check", "<vdiname>", "saph", "check and repair image's consistency",
>>   	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
>> @@ -2402,6 +2472,9 @@ static struct subcommand vdi_cmd[] = {
>>   	{"cache", "<vdiname>", "saph", "Run 'dog vdi cache' for more information",
>>   	 vdi_cache_cmd, CMD_NEED_ARG,
>>   	 vdi_cache, vdi_options},
>> +	{"copies", "<vdiname> <num>", "aph", "set the vdi's replica number",
>> +	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
>> +	 vdi_copies, vdi_options},
>>   	{NULL,},
>>   };
>>   
>> diff --git a/include/internal_proto.h b/include/internal_proto.h
>> index 0eb7227..4e95e55 100644
>> --- a/include/internal_proto.h
>> +++ b/include/internal_proto.h
>> @@ -101,6 +101,8 @@
>>   #define SD_OP_NFS_DELETE	0xBC
>>   #define SD_OP_EXIST	0xBD
>>   #define SD_OP_CLUSTER_INFO	0xBE
>> +#define SD_OP_SET_CLUSTER_COPIES	0xC0
>> +#define SD_OP_SET_VDI_COPIES	0xC1
>>   
>>   /* internal flags for hdr.flags, must be above 0x80 */
>>   #define SD_FLAG_CMD_RECOVERY 0x0080
>> diff --git a/sheep/ops.c b/sheep/ops.c
>> index b9550f0..523dfbc 100644
>> --- a/sheep/ops.c
>> +++ b/sheep/ops.c
>> @@ -714,6 +714,35 @@ static int cluster_recovery_completion(const struct sd_req *req,
>>   	return SD_RES_SUCCESS;
>>   }
>>   
>> +static int cluster_set_cluster_copies(const struct sd_req *req,
>> +			struct sd_rsp *rsp, void *data)
>> +{
>> +	if (req->cluster.copy_policy != 0)
>> +		return SD_RES_INVALID_PARMS;
>> +
>> +	sys->cinfo.nr_copies = req->cluster.copies;
>> +	return set_cluster_config(&sys->cinfo);
>> +}
>> +
>> +static int cluster_set_vdi_copies(const struct sd_req *req,
>> +			struct sd_rsp *rsp, void *data)
>> +{
>> +	if (req->cluster.copy_policy != 0)
>> +		return SD_RES_INVALID_PARMS;
>> +
>> +	uint32_t vid = req->vdi_state.new_vid;
>> +	int nr_copies = req->vdi_state.copies;
>> +	struct vnode_info *vinfo;
>> +
>> +	add_vdi_state(vid, nr_copies, false, 0);
>> +
>> +	vinfo = get_vnode_info();
>> +	start_recovery(vinfo, vinfo, false);
>> +	put_vnode_info(vinfo);
>> +
>> +	return SD_RES_SUCCESS;
>> +}
>> +
>>   static bool node_size_varied(void)
>>   {
>>   	uint64_t new, used, old = sys->this_node.space;
>> @@ -1179,6 +1208,20 @@ static struct sd_op_template sd_ops[] = {
>>   		.process_main = cluster_disable_recover,
>>   	},
>>   
>> +	[SD_OP_SET_CLUSTER_COPIES] = {
>> +		.name = "SET_CLUSTER_COPIES",
>> +		.type = SD_OP_TYPE_CLUSTER,
>> +		.is_admin_op = true,
>> +		.process_main = cluster_set_cluster_copies,
>> +	},
>> +
>> +	[SD_OP_SET_VDI_COPIES] = {
>> +		.name = "SET_VDI_COPIES",
>> +		.type = SD_OP_TYPE_CLUSTER,
>> +		.is_admin_op = true,
>> +		.process_main = cluster_set_vdi_copies,
>> +	},
>> +
>>   	/* local operations */
>>   	[SD_OP_RELEASE_VDI] = {
>>   		.name = "RELEASE_VDI",
>> -- 
>> 1.8.3.2
>>
>>
>> -- 
>> sheepdog mailing list
>> sheepdog at lists.wpkg.org
>> http://lists.wpkg.org/mailman/listinfo/sheepdog





More information about the sheepdog mailing list