[sheepdog] [PATCH 2/2] sheep: optimize vdi check

Yunkai Zhang yunkai.me at gmail.com
Wed Aug 15 20:36:46 CEST 2012


From: Yunkai Zhang <qiushu.zyk at taobao.com>


Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com>
---
 collie/vdi.c             | 79 +++++++++++++-----------------------------------
 include/internal_proto.h |  1 +
 include/sheep.h          | 16 ++++++++++
 sheep/farm/farm.c        |  2 ++
 sheep/farm/farm.h        |  1 -
 sheep/farm/sha1_file.c   | 16 ----------
 sheep/ops.c              | 73 ++++++++++++++++++++++++++++++++++++++++++++
 sheep/sheep_priv.h       |  1 +
 8 files changed, 114 insertions(+), 75 deletions(-)

diff --git a/collie/vdi.c b/collie/vdi.c
index e4f4f65..e34a2ba 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -1304,50 +1304,29 @@ out:
 	return ret;
 }
 
-static void *read_object_from(struct sd_vnode *vnode, uint64_t oid)
+static void get_obj_checksum_from(struct sd_vnode *vnode, uint64_t oid,
+				  unsigned char *sha1)
 {
 	struct sd_req hdr;
 	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
-	int fd, ret;
-	unsigned wlen = 0, rlen = SD_DATA_OBJ_SIZE;
-	char name[128];
-	void *buf;
+	char host[128];
+	int ret;
 
-	buf = malloc(SD_DATA_OBJ_SIZE);
-	if (!buf) {
-		fprintf(stderr, "Failed to allocate memory\n");
-		exit(EXIT_SYSFAIL);
-	}
+	addr_to_str(host, sizeof(host), vnode->nid.addr, 0);
 
-	addr_to_str(name, sizeof(name), vnode->nid.addr, 0);
-	fd = connect_to(name, vnode->nid.port);
-	if (fd < 0) {
-		fprintf(stderr, "failed to connect to %s:%"PRIu32"\n",
-			name, vnode->nid.port);
-		exit(EXIT_FAILURE);
-	}
-
-	sd_init_req(&hdr, SD_OP_READ_PEER);
+	sd_init_req(&hdr, SD_OP_CALC_CHKSUM_PEER);
 	hdr.epoch = sd_epoch;
-	hdr.flags = 0;
-	hdr.data_length = rlen;
-
 	hdr.obj.oid = oid;
 
-	ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
-	close(fd);
-
-	if (ret) {
-		fprintf(stderr, "Failed to execute request\n");
-		exit(EXIT_FAILURE);
-	}
-
-	if (rsp->result != SD_RES_SUCCESS) {
+	fprintf(stderr, "oid:%"PRIx64", host:%s, port:%d\n", oid, host, vnode->nid.port);
+	ret = send_light_req(&hdr, host, vnode->nid.port);
+	if (ret || rsp->result != SD_RES_SUCCESS) {
 		fprintf(stderr, "Failed to read, %s\n",
 			sd_strerror(rsp->result));
 		exit(EXIT_FAILURE);
 	}
-	return buf;
+
+	memcpy(sha1, (unsigned char *)&rsp->__pad[0], SHA1_LEN);
 }
 
 static void write_object_to(struct sd_vnode *vnode, uint64_t oid, void *buf)
@@ -1388,39 +1367,23 @@ static void write_object_to(struct sd_vnode *vnode, uint64_t oid, void *buf)
 	}
 }
 
-/*
- * Fix consistency of the replica of oid.
- *
- * XXX: The fix is rather dumb, just read the first copy and write it
- * to other replica.
- */
-static void do_check_repair(uint64_t oid, int nr_copies)
+static void do_check(uint64_t oid, int nr_copies)
 {
 	struct sd_vnode *tgt_vnodes[nr_copies];
-	void *buf, *buf_cmp;
+	unsigned char sha1[SD_MAX_COPIES][SHA1_LEN];
+	char hex[SD_MAX_COPIES][2*SHA1_LEN+1];
 	int i;
 
 	oid_to_vnodes(sd_vnodes, sd_vnodes_nr, oid, nr_copies, tgt_vnodes);
-	buf = read_object_from(tgt_vnodes[0], oid);
-	for (i = 1; i < nr_copies; i++) {
-		buf_cmp = read_object_from(tgt_vnodes[i], oid);
-		if (memcmp(buf, buf_cmp, SD_DATA_OBJ_SIZE)) {
-			free(buf_cmp);
-			goto fix_consistency;
-		}
-		free(buf_cmp);
+	for (i = 0; i < nr_copies; i++) {
+		get_obj_checksum_from(tgt_vnodes[i], oid, sha1[i]);
+		strncpy(hex[i], sha1_to_hex(sha1[i]), 2*SHA1_LEN+1);
+		dprintf("oid%"PRIx64", sha1:%s\n", oid, hex[i]);
 	}
-	free(buf);
 	return;
-
-fix_consistency:
-	for (i = 1; i < nr_copies; i++)
-		write_object_to(tgt_vnodes[i], oid, buf);
-	fprintf(stdout, "fix %"PRIx64" success\n", oid);
-	free(buf);
 }
 
-static int check_repair_vdi(uint32_t vid)
+static int check_vdi(uint32_t vid)
 {
 	struct sheepdog_inode *inode;
 	int ret;
@@ -1443,7 +1406,7 @@ static int check_repair_vdi(uint32_t vid)
 		dvid = inode->data_vdi_id[idx];
 		if (dvid) {
 			oid = vid_to_data_oid(dvid, idx);
-			do_check_repair(oid, inode->nr_copies);
+			do_check(oid, inode->nr_copies);
 		}
 		done += SD_DATA_OBJ_SIZE;
 		idx++;
@@ -1466,7 +1429,7 @@ static int vdi_check(int argc, char **argv)
 		goto out;
 	}
 
-	ret = check_repair_vdi(vid);
+	ret = check_vdi(vid);
 	if (ret != EXIT_SUCCESS)
 		goto out;
 
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 83d98f1..6d46157 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -63,6 +63,7 @@
 #define SD_OP_ENABLE_RECOVER 0xA8
 #define SD_OP_DISABLE_RECOVER 0xA9
 #define SD_OP_INFO_RECOVER 0xAA
+#define SD_OP_CALC_CHKSUM_PEER 0xAB
 
 /* internal flags for hdr.flags, must be above 0x80 */
 #define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/include/sheep.h b/include/sheep.h
index 719d18f..54b6eb3 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -279,4 +279,20 @@ static inline int nodes_to_vnodes(struct sd_node *nodes, int nr,
 	return nr_vnodes;
 }
 
+static inline char *sha1_to_hex(const unsigned char *sha1)
+{
+	static char buffer[50];
+	static const char hex[] = "0123456789abcdef";
+	char *buf = buffer;
+	int i;
+
+	for (i = 0; i < SHA1_LEN; i++) {
+		unsigned int val = *sha1++;
+		*buf++ = hex[val >> 4];
+		*buf++ = hex[val & 0xf];
+	}
+	buffer[2 * SHA1_LEN] = 0;
+	return buffer;
+}
+
 #endif
diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index 7eeae9a..bfc0c84 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -508,6 +508,8 @@ static int farm_read(uint64_t oid, struct siocb *iocb)
 	}
 	if (size != iocb->length) {
 		ret = SD_RES_EIO;
+		dprintf("oid:%"PRIx64", offset:%lu, length:%d, size:%zd\n", oid,
+		iocb->offset, iocb->length, size);
 		goto out;
 	}
 out:
diff --git a/sheep/farm/farm.h b/sheep/farm/farm.h
index 27e65cd..d0b635a 100644
--- a/sheep/farm/farm.h
+++ b/sheep/farm/farm.h
@@ -53,7 +53,6 @@ extern char farm_obj_dir[PATH_MAX];
 extern char *sha1_to_path(const unsigned char *sha1);
 extern int sha1_file_write(unsigned char *buf, unsigned len, unsigned char *outsha1);
 extern void *sha1_file_read(const unsigned char *sha1, struct sha1_file_hdr *hdr);
-extern char *sha1_to_hex(const unsigned char *sha1);
 extern int get_sha1_hex(const char *hex, unsigned char *sha1);
 extern int sha1_file_try_delete(const unsigned char *sha1);
 
diff --git a/sheep/farm/sha1_file.c b/sheep/farm/sha1_file.c
index 493ca79..3cd4f40 100644
--- a/sheep/farm/sha1_file.c
+++ b/sheep/farm/sha1_file.c
@@ -257,19 +257,3 @@ int get_sha1_hex(const char *hex, unsigned char *sha1)
 	}
 	return 0;
 }
-
-char *sha1_to_hex(const unsigned char *sha1)
-{
-	static char buffer[50];
-	static const char hex[] = "0123456789abcdef";
-	char *buf = buffer;
-	int i;
-
-	for (i = 0; i < SHA1_LEN; i++) {
-		unsigned int val = *sha1++;
-		*buf++ = hex[val >> 4];
-		*buf++ = hex[val & 0xf];
-	}
-	buffer[2 * SHA1_LEN] = 0;
-	return buffer;
-}
diff --git a/sheep/ops.c b/sheep/ops.c
index 8ca8748..1c3abea 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -26,6 +26,7 @@
 #include "strbuf.h"
 #include "trace/trace.h"
 #include "util.h"
+#include "sha1.h"
 
 enum sd_op_type {
 	SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
@@ -748,6 +749,70 @@ out:
 	return ret;
 }
 
+int peer_calc_obj_chksum(struct request *req)
+{
+	struct sd_req *hdr = &req->rq;
+	struct sd_rsp *rsp = &req->rp;
+	uint32_t epoch = hdr->epoch;
+	unsigned char sha1[SHA1_LEN];
+	struct siocb iocb;
+	struct sha1_ctx ctx;
+	void *buf;
+	int ret;
+
+	if (sys->gateway_only)
+		return SD_RES_NO_OBJ;
+
+	assert(req->data_length == 0);
+	assert(hdr->data_length == 0);
+
+	hdr->data_length = SD_DATA_OBJ_SIZE;
+	if (is_vdi_obj(hdr->obj.oid))
+		hdr->data_length = SD_INODE_SIZE;
+
+	buf = xmalloc(hdr->data_length);
+	req->data = buf;
+	hdr->obj.offset = 0;
+
+	if (sys->enable_write_cache && !req->local
+	    && !bypass_object_cache(req)) {
+		ret = object_cache_handle_request(req);
+		if (ret != SD_RES_SUCCESS)
+			goto out;
+
+		goto checksum;
+	}
+
+	dprintf("1\n");
+	memset(&iocb, 0, sizeof(iocb));
+	iocb.epoch = epoch;
+	iocb.flags = hdr->flags;
+
+	iocb.offset = hdr->obj.offset;
+	iocb.length = hdr->data_length;
+	iocb.buf = buf;
+
+	ret = sd_store->read(hdr->obj.oid, &iocb);
+	dprintf("oid:%"PRIx64", offset:%lu, length:%d, ret:%d\n", hdr->obj.oid,
+		iocb.offset, iocb.length, ret);
+	if (ret != SD_RES_SUCCESS)
+		goto out;
+checksum:
+	sha1_init(&ctx);
+	sha1_update(&ctx, buf, req->data_length);
+	sha1_final(&ctx, sha1);
+	memcpy(&rsp->__pad[0], sha1, SHA1_LEN);
+	dprintf("oid:%"PRIx64", sha1:%s\n", hdr->obj.oid, sha1_to_hex(sha1));
+out:
+	dprintf("0\n");
+	req->data = NULL;
+	req->data_length = 0;
+	hdr->data_length = 0;
+	rsp->data_length = 0;
+	free(buf);
+	return ret;
+}
+
 static int do_write_obj(struct siocb *iocb, struct sd_req *hdr, uint32_t epoch,
 		void *data, int create)
 {
@@ -1084,11 +1149,19 @@ static struct sd_op_template sd_ops[] = {
 		.type = SD_OP_TYPE_CLUSTER,
 		.process_main = cluster_disable_recover,
 	},
+
 	[SD_OP_INFO_RECOVER] = {
 		.name = "INFO_RECOVER",
 		.type = SD_OP_TYPE_LOCAL,
 		.process_main = local_info_recover,
 	},
+
+	[SD_OP_CALC_CHKSUM_PEER] = {
+		.name = "CALC_CHKSUM_PEER",
+		.type = SD_OP_TYPE_PEER,
+		.process_work = peer_calc_obj_chksum,
+	},
+
 };
 
 struct sd_op_template *get_sd_op(uint8_t opcode)
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 857cf87..9ef22f7 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -325,6 +325,7 @@ int peer_read_obj(struct request *req);
 int peer_write_obj(struct request *req);
 int peer_create_and_write_obj(struct request *req);
 int peer_remove_obj(struct request *req);
+int peer_calc_obj_chksum(struct request *req);
 
 /* object_cache */
 
-- 
1.7.11.4




More information about the sheepdog mailing list