[sheepdog] [PATCH V3] collie: optimize 'collie vdi check' command

Liu Yuan namei.unix at gmail.com
Tue Aug 28 14:44:41 CEST 2012


On 08/28/2012 07:42 PM, Yunkai Zhang wrote:
> 
> User can do force repair by specify -R or --repair flag:
> * Force repair:
> $ collie vdi check -R test.img
> CHECKING VDI:test.img ...
> Failed oid: 9c5e6800000001
>>> copy[0], sha1: c78ca69c4be7401b6d1f11a37a4cec4226e736cd, from: 127.0.0.1:7000
>>> copy[1], sha1: 46dbc769de60a508faf134c6d51926741c0e38fa, from: 127.0.0.1:7001
>>> copy[2], sha1: c78ca69c4be7401b6d1f11a37a4cec4226e736cd, from: 127.0.0.1:7004
>>> Repaired successfully!
> REPAIRED

I suspect that user might want to fix the consistency with 'check command' running once like fsck,
so maybe --no-repair as an option is better.

> 
> diff --git a/collie/common.c b/collie/common.c
> index ce8dcf7..dd54ae4 100644
> --- a/collie/common.c
> +++ b/collie/common.c
> @@ -214,8 +214,7 @@ int send_light_req_get_response(struct sd_req *hdr, const char *host, int port)
>  	ret = exec_req(fd, hdr, NULL, &wlen, &rlen);
>  	close(fd);
>  	if (ret) {
> -		fprintf(stderr, "failed to connect to  %s:%d\n",
> -			host, port);
> +		dprintf("failed to connect to  %s:%d\n", host, port);

This is used only by collie, so fpritnf(stderr...) is better than dprintf, which print out useless 
line number.

>  		return -1;
>  	}
>  
> @@ -236,8 +235,7 @@ int send_light_req(struct sd_req *hdr, const char *host, int port)
>  		return -1;
>  
>  	if (ret != SD_RES_SUCCESS) {
> -		fprintf(stderr, "Response's result: %s\n",
> -			sd_strerror(ret));
> +		dprintf("Response's result: %s\n", sd_strerror(ret));

ditto.

> diff --git a/include/internal_proto.h b/include/internal_proto.h
> index c1d116a..96fee75 100644
> --- a/include/internal_proto.h
> +++ b/include/internal_proto.h
> @@ -61,10 +61,14 @@
>  #define SD_OP_REMOVE_PEER    0xA6
>  #define SD_OP_SET_CACHE_SIZE 0xA7
>  #define SD_OP_ENABLE_RECOVER 0xA8
> -#define SD_OP_DISABLE_RECOVER 0xA9
> -#define SD_OP_INFO_RECOVER 0xAA
> -#define SD_OP_GET_VDI_COPIES 0xAB
> +#define SD_OP_DISABLE_RECOVER  0xA9
> +#define SD_OP_INFO_RECOVER     0xAA
> +#define SD_OP_GET_VDI_COPIES   0xAB
>  #define SD_OP_COMPLETE_RECOVERY 0xAC
> +#define SD_OP_CALC_CHKSUM      0xAD
> +#define SD_OP_CALC_CHKSUM_PEER 0xAE
> +#define SD_OP_REPAIR_OBJ       0xAF
> +#define SD_OP_REPAIR_OBJ_PEER  0xB0
>  
>  /* internal flags for hdr.flags, must be above 0x80 */
>  #define SD_FLAG_CMD_RECOVERY 0x0080
> @@ -169,6 +173,20 @@ struct sd_node_rsp {
>  	uint64_t	store_free;
>  };
>  
> +struct sd_checksum_rsp {
> +	uint8_t		proto_ver;
> +	uint8_t		opcode;
> +	uint16_t	flags;
> +	uint32_t	epoch;
> +	uint32_t	id;
> +	uint32_t	data_length;
> +	uint32_t	result;
> +	union {
> +		uint8_t		sha1[SHA1_LEN];
> +		uint32_t	__pad[7];
> +	};
> +};
> +
>  struct node_id {
>  	uint8_t addr[16];
>  	uint16_t port;
> diff --git a/include/sheep.h b/include/sheep.h
> index fbbab85..a97cb43 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -284,4 +284,20 @@ static inline int nodes_to_vnodes(struct sd_node *nodes, int nr,
>  	return nr_vnodes;
>  }
>  
> +static inline char *sha1_to_hex(const unsigned char *sha1)
> +{
> +	static char buffer[50];
> +	static const char hex[] = "0123456789abcdef";
> +	char *buf = buffer;
> +	int i;
> +
> +	for (i = 0; i < SHA1_LEN; i++) {
> +		unsigned int val = *sha1++;
> +		*buf++ = hex[val >> 4];
> +		*buf++ = hex[val & 0xf];
> +	}
> +	buffer[2 * SHA1_LEN] = 0;
> +	return buffer;
> +}
> +

We can't move it out because both buffer and hex is static variable. And we'd better to keep
it in sha1_file.c and export it out as a function.

>  #endif
> diff --git a/sheep/farm/farm.h b/sheep/farm/farm.h
> index 1a0081e..da79159 100644
> --- a/sheep/farm/farm.h
> +++ b/sheep/farm/farm.h
> @@ -46,7 +46,6 @@ extern char farm_obj_dir[PATH_MAX];
>  extern char *sha1_to_path(const unsigned char *sha1);
>  extern int sha1_file_write(unsigned char *buf, unsigned len, unsigned char *);
>  extern void *sha1_file_read(const unsigned char *sha1, struct sha1_file_hdr *);
> -extern char *sha1_to_hex(const unsigned char *sha1);
>  extern int get_sha1_hex(const char *hex, unsigned char *sha1);
>  extern int sha1_file_try_delete(const unsigned char *sha1);
>  
> diff --git a/sheep/farm/sha1_file.c b/sheep/farm/sha1_file.c
> index b0abc16..3cd4f40 100644
> --- a/sheep/farm/sha1_file.c
> +++ b/sheep/farm/sha1_file.c
> @@ -257,18 +257,3 @@ int get_sha1_hex(const char *hex, unsigned char *sha1)
>  	}
>  	return 0;
>  }
> -
> -char *sha1_to_hex(const unsigned char *sha1)
> -{
> -	static char buffer[50];
> -	static const char hex[] = "0123456789abcdef";
> -	char *buf = buffer;
> -	int i;
> -
> -	for (i = 0; i < SHA1_LEN; i++) {
> -		unsigned int val = *sha1++;
> -		*buf++ = hex[val >> 4];
> -		*buf++ = hex[val & 0xf];
> -	}
> -	return buffer;
> -}
> diff --git a/sheep/gateway.c b/sheep/gateway.c
> index 41d712b..528c912 100644
> --- a/sheep/gateway.c
> +++ b/sheep/gateway.c
> @@ -316,3 +316,119 @@ int gateway_remove_obj(struct request *req)
>  {
>  	return gateway_forward_request(req);
>  }
> +
> +int gateway_calc_obj_chksum(struct request *req)
> +{
> +	struct sd_req fwdhdr, *hdr = &req->rq;
> +	struct sd_checksum_rsp *rsp;
> +	struct sd_vnode *vnodes[SD_MAX_COPIES];
> +	struct node_id *nid;
> +	char host[128];
> +	unsigned int rlen, wlen;
> +	uint64_t oid = hdr->obj.oid;
> +	int fd, ret, nr_objs;
> +	int i = 0, offset = 0;
> +
> +	nr_objs = get_obj_copy_number(oid, req->vinfo->nr_zones);
> +
> +	oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes,
> +		      oid, nr_objs, vnodes);
> +
> +loop:
> +	nid = &vnodes[i]->nid;
> +	addr_to_str(host, sizeof(host), nid->addr, 0);
> +
> +	fd = connect_to(host, nid->port);
> +	if (fd < 0) {
> +		dprintf("Failed to connect\n");
> +		return SD_RES_EIO;
> +	}
> +
> +	rlen = 0;
> +	wlen = 0;
> +
> +	memcpy(&fwdhdr, hdr, sizeof(fwdhdr));
> +	fwdhdr.opcode = gateway_to_peer_opcode(hdr->opcode);
> +	fwdhdr.flags = 0;
> +	fwdhdr.data_length = 0;
> +
> +	rsp = (struct sd_checksum_rsp *)&fwdhdr;
> +	ret = exec_req(fd, &fwdhdr, NULL, &wlen, &rlen);
> +	close(fd);
> +	if (ret)
> +		return SD_RES_EIO;
> +
> +	if (rsp->result == SD_RES_SUCCESS)
> +		memcpy((char *)req->data + offset, rsp->sha1, SHA1_LEN);
> +	else if (rsp->result == SD_RES_NO_OBJ)
> +		memset((char *)req->data + offset, 0, SHA1_LEN);
> +	else
> +		return rsp->result;
> +
> +	memcpy((char *)req->data + offset + SHA1_LEN, nid, sizeof(*nid));
> +	offset += SHA1_LEN + sizeof(*nid);
> +
> +	if (++i < nr_objs)
> +		goto loop;
> +
> +	req->rp.obj.copies = nr_objs;
> +	req->rp.data_length = offset;
> +
> +	return SD_RES_SUCCESS;
> +}
> +
> +int gateway_repair_obj(struct request *req)
> +{
> +	struct sd_req fwdhdr, *hdr = &req->rq;
> +	struct sd_rsp *rsp = (struct sd_rsp *)&fwdhdr;
> +	struct node_id *src, *dest;
> +	struct sd_vnode *vnodes[SD_MAX_COPIES];
> +	char host[128], to[128];
> +	uint64_t oid = hdr->obj.oid;
> +	unsigned rlen, wlen;
> +	int i, fd, ret, err_ret, nr_objs;
> +
> +	nr_objs = get_obj_copy_number(oid, req->vinfo->nr_zones);
> +
> +	oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes,
> +		      oid, nr_objs, vnodes);
> +
> +	i = 1;
> +	src = &vnodes[0]->nid;
> +	err_ret = SD_RES_SUCCESS;
> +loop:
> +	dest = &vnodes[i]->nid;
> +	addr_to_str(host, sizeof(host), dest->addr, 0);
> +
> +	fd = connect_to(host, dest->port);
> +	if (fd < 0) {
> +		dprintf("Failed to connect\n");
> +		return SD_RES_EIO;
> +	}
> +
> +	rlen = 0;
> +	wlen = sizeof(*src);
> +
> +	memcpy(&fwdhdr, hdr, sizeof(fwdhdr));
> +	fwdhdr.opcode = gateway_to_peer_opcode(hdr->opcode);
> +	fwdhdr.flags = 0;
> +	fwdhdr.data_length = wlen;
> +	fwdhdr.flags = SD_FLAG_CMD_WRITE;
> +
> +	ret = exec_req(fd, &fwdhdr, src, &wlen, &rlen);
> +	close(fd);
> +	if (ret)
> +		return SD_RES_EIO;
> +
> +	if (rsp->result != SD_RES_SUCCESS) {
> +		dprintf("oid:%"PRIx64", from:%s to:%s\n", oid,
> +			addr_to_str(host, sizeof(host), src->addr, src->port),
> +			addr_to_str(to, sizeof(to), dest->addr, dest->port));
> +		err_ret = rsp->result;
> +	}
> +
> +	if (++i < nr_objs)
> +		goto loop;
> +
> +	return err_ret;
> +}

I think both two functions should use sockfd and we might actually share the forward_gateway_request()
by modifying its interface,

forward_gateway_request(req, func_pointer)

thus we can pass wait_forward_request() to it for write/remove/create_and_write/repair, and add a new
function for cacl_object_chksum. In this way, we can boost requests handling in parallel. 

> diff --git a/sheep/ops.c b/sheep/ops.c
> index ccb1c5e..3225c59 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -26,6 +26,7 @@
>  #include "strbuf.h"
>  #include "trace/trace.h"
>  #include "util.h"
> +#include "sha1.h"
>  
>  enum sd_op_type {
>  	SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
> @@ -219,7 +220,6 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
>  	int i, ret;
>  	uint32_t latest_epoch;
>  	uint64_t created_time;
> -	struct siocb iocb = { 0 };
>  	struct store_driver *driver;
>  
>  	driver = find_store_driver(data);
> @@ -228,7 +228,6 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
>  
>  	sd_store = driver;
>  	latest_epoch = get_latest_epoch();
> -	iocb.epoch = latest_epoch;
>  
>  	ret = sd_store->format(data);
>  	if (ret != SD_RES_SUCCESS)
> @@ -688,6 +687,51 @@ static int local_kill_node(const struct sd_req *req, struct sd_rsp *rsp,
>  	return SD_RES_SUCCESS;
>  }
>  
> +static inline int read_object_from(struct node_id *src, uint32_t epoch,
> +				   uint64_t oid, char *buf)
> +{
> +	struct sd_req hdr;
> +	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> +	int fd, ret;
> +	unsigned wlen, rlen;
> +	char host[128];
> +
> +	addr_to_str(host, sizeof(host), src->addr, 0);
> +
> +	fd = connect_to(host, src->port);
> +	if (fd < 0) {
> +		dprintf("failed to connect to %s:%"PRIu32"\n",
> +			host, src->port);
> +		return SD_RES_NETWORK_ERROR;
> +	}
> +

use sockfd cache.

Thanks,
Yuan



More information about the sheepdog mailing list