[sheepdog] [PATCH 1/2] collie: optimize 'collie vdi check' command

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Tue Aug 21 07:42:34 CEST 2012


At Thu, 16 Aug 2012 22:38:21 +0800,
Yunkai Zhang wrote:
> 
> After add '-F' flag, the help looks like:
> $ collie vdi check
> Usage: collie vdi check [-F] [-s snapshot] [-a address] [-p port] [-h] <vdiname>
> Options:
>   -F, --force_repair      force repair object's copies (dangerous)

How about '-r, --repair'?

>  		fprintf(stderr, "Failed to read, %s\n",
>  			sd_strerror(rsp->result));
>  		exit(EXIT_FAILURE);
>  	}
> -	return buf;
> +
> +	memcpy(sha1, (unsigned char *)&rsp->__pad[0], SHA1_LEN);

Please define a member name instead of using __pad.


>  }
>  
> -static void write_object_to(struct sd_vnode *vnode, uint64_t oid, void *buf)
> +static int do_repair(uint64_t oid, struct node_id *src, struct node_id *dest)
>  {
>  	struct sd_req hdr;
>  	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> +	unsigned rlen, wlen;
> +	char host[128];
>  	int fd, ret;
> -	unsigned wlen = SD_DATA_OBJ_SIZE, rlen = 0;
> -	char name[128];
>  
> -	addr_to_str(name, sizeof(name), vnode->nid.addr, 0);
> -	fd = connect_to(name, vnode->nid.port);
> +	addr_to_str(host, sizeof(host), dest->addr, 0);
> +
> +	fd = connect_to(host, dest->port);
>  	if (fd < 0) {
> -		fprintf(stderr, "failed to connect to %s:%"PRIu32"\n",
> -			name, vnode->nid.port);
> -		exit(EXIT_FAILURE);
> +		fprintf(stderr, "Failed to connect\n");
> +		return SD_RES_EIO;
>  	}
>  
> -	sd_init_req(&hdr, SD_OP_WRITE_PEER);
> -	hdr.epoch = sd_epoch;
> -	hdr.flags = SD_FLAG_CMD_WRITE;
> -	hdr.data_length = wlen;
> +	sd_init_req(&hdr, SD_OP_REPAIR_OBJ_PEER);

I don't think sending peer requests directly from outside sheeps is a
good idea.  How about making the gateway node forward the requests?

>  
> +	rlen = 0;
> +	wlen = sizeof(*src);
> +
> +	hdr.epoch = sd_epoch;
>  	hdr.obj.oid = oid;
> +	hdr.data_length = wlen;
> +	hdr.flags = SD_FLAG_CMD_WRITE;
>  
> -	ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
> +	ret = exec_req(fd, &hdr, src, &wlen, &rlen);
>  	close(fd);
> -
>  	if (ret) {
> -		fprintf(stderr, "Failed to execute request\n");
> -		exit(EXIT_FAILURE);
> +		fprintf(stderr, "Failed to repair oid:%"PRIx64"\n", oid);
> +		return SD_RES_EIO;
>  	}
> -
>  	if (rsp->result != SD_RES_SUCCESS) {
> -		fprintf(stderr, "Failed to read, %s\n",
> -			sd_strerror(rsp->result));
> -		exit(EXIT_FAILURE);
> +		fprintf(stderr, "Failed to repair oid:%"PRIx64", %s\n",
> +			oid, sd_strerror(rsp->result));
> +		return rsp->result;
>  	}
> +
> +	return SD_RES_SUCCESS;
>  }
>  
> -/*
> - * Fix consistency of the replica of oid.
> - *
> - * XXX: The fix is rather dumb, just read the first copy and write it
> - * to other replica.
> - */
> -static void do_check_repair(uint64_t oid, int nr_copies)
> +static int do_check_repair(uint64_t oid, int nr_copies)
>  {
>  	struct sd_vnode *tgt_vnodes[nr_copies];
> -	void *buf, *buf_cmp;
> -	int i;
> +	unsigned char sha1[SD_MAX_COPIES][SHA1_LEN];
> +	char host[128];
> +	int i, j;
>  
>  	oid_to_vnodes(sd_vnodes, sd_vnodes_nr, oid, nr_copies, tgt_vnodes);
> -	buf = read_object_from(tgt_vnodes[0], oid);
> -	for (i = 1; i < nr_copies; i++) {
> -		buf_cmp = read_object_from(tgt_vnodes[i], oid);
> -		if (memcmp(buf, buf_cmp, SD_DATA_OBJ_SIZE)) {
> -			free(buf_cmp);
> -			goto fix_consistency;
> +	for (i = 0; i < nr_copies; i++) {
> +		get_obj_checksum_from(tgt_vnodes[i], oid, sha1[i]);
> +	}
> +
> +	for (i = 0; i < nr_copies; i++) {
> +		for (j = (i + 1); j < nr_copies; j++) {
> +			if (memcmp(sha1[i], sha1[j], SHA1_LEN))
> +				goto diff;
>  		}
> -		free(buf_cmp);
>  	}
> -	free(buf);
> -	return;
> +	return 0;
>  
> -fix_consistency:
> -	for (i = 1; i < nr_copies; i++)
> -		write_object_to(tgt_vnodes[i], oid, buf);
> -	fprintf(stdout, "fix %"PRIx64" success\n", oid);
> -	free(buf);
> +diff:
> +	fprintf(stderr, "Failed oid: %"PRIx64"\n", oid);
> +	for (i = 0; i < nr_copies; i++) {
> +		addr_to_str(host, sizeof(host), tgt_vnodes[i]->nid.addr, 0);
> +		fprintf(stderr, ">> copy[%d], sha1: %s, from: %s:%d\n",
> +			i, sha1_to_hex(sha1[i]), host, tgt_vnodes[i]->nid.port);
> +	}
> +
> +	if (!vdi_cmd_data.force_repair)
> +		return -1;
> +
> +	/*
> +	 * Force repair the consistency of oid's replica
> +	 *
> +	 * FIXME: this fix is rather dumb, it just read the
> +	 * first copy and write it to other replica,
> +	 */
> +	fprintf(stderr, ">> force repairing ...\n");
> +	addr_to_str(host, sizeof(host), tgt_vnodes[0]->nid.addr,
> +		    tgt_vnodes[0]->nid.port);
> +	for (i = 1; i< nr_copies; i++) {

Needs a space before '<'.

> @@ -1466,18 +1474,22 @@ static int vdi_check(int argc, char **argv)
>  		goto out;
>  	}
>  
> +	printf("CHECKING VDI:%s ...\n", vdiname);
>  	ret = check_repair_vdi(vid);
> -	if (ret != EXIT_SUCCESS)
> +	if (ret != EXIT_SUCCESS) {
> +		printf("%s\n",
> +		       vdi_cmd_data.force_repair?"FORCE REPAIRED":"FAILED");

Need spaces around '?' and ':'.

Thanks,

Kazutaka



More information about the sheepdog mailing list