[sheepdog] [PATCH 2/2] sheep: optimize vdi check
Yunkai Zhang
yunkai.me at gmail.com
Wed Aug 15 20:36:46 CEST 2012
From: Yunkai Zhang <qiushu.zyk at taobao.com>
Signed-off-by: Yunkai Zhang <qiushu.zyk at taobao.com>
---
collie/vdi.c | 79 +++++++++++++-----------------------------------
include/internal_proto.h | 1 +
include/sheep.h | 16 ++++++++++
sheep/farm/farm.c | 2 ++
sheep/farm/farm.h | 1 -
sheep/farm/sha1_file.c | 16 ----------
sheep/ops.c | 73 ++++++++++++++++++++++++++++++++++++++++++++
sheep/sheep_priv.h | 1 +
8 files changed, 114 insertions(+), 75 deletions(-)
diff --git a/collie/vdi.c b/collie/vdi.c
index e4f4f65..e34a2ba 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -1304,50 +1304,29 @@ out:
return ret;
}
-static void *read_object_from(struct sd_vnode *vnode, uint64_t oid)
+static void get_obj_checksum_from(struct sd_vnode *vnode, uint64_t oid,
+ unsigned char *sha1)
{
struct sd_req hdr;
struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
- int fd, ret;
- unsigned wlen = 0, rlen = SD_DATA_OBJ_SIZE;
- char name[128];
- void *buf;
+ char host[128];
+ int ret;
- buf = malloc(SD_DATA_OBJ_SIZE);
- if (!buf) {
- fprintf(stderr, "Failed to allocate memory\n");
- exit(EXIT_SYSFAIL);
- }
+ addr_to_str(host, sizeof(host), vnode->nid.addr, 0);
- addr_to_str(name, sizeof(name), vnode->nid.addr, 0);
- fd = connect_to(name, vnode->nid.port);
- if (fd < 0) {
- fprintf(stderr, "failed to connect to %s:%"PRIu32"\n",
- name, vnode->nid.port);
- exit(EXIT_FAILURE);
- }
-
- sd_init_req(&hdr, SD_OP_READ_PEER);
+ sd_init_req(&hdr, SD_OP_CALC_CHKSUM_PEER);
hdr.epoch = sd_epoch;
- hdr.flags = 0;
- hdr.data_length = rlen;
-
hdr.obj.oid = oid;
- ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
- close(fd);
-
- if (ret) {
- fprintf(stderr, "Failed to execute request\n");
- exit(EXIT_FAILURE);
- }
-
- if (rsp->result != SD_RES_SUCCESS) {
+ fprintf(stderr, "oid:%"PRIx64", host:%s, port:%d\n", oid, host, vnode->nid.port);
+ ret = send_light_req(&hdr, host, vnode->nid.port);
+ if (ret || rsp->result != SD_RES_SUCCESS) {
fprintf(stderr, "Failed to read, %s\n",
sd_strerror(rsp->result));
exit(EXIT_FAILURE);
}
- return buf;
+
+ memcpy(sha1, (unsigned char *)&rsp->__pad[0], SHA1_LEN);
}
static void write_object_to(struct sd_vnode *vnode, uint64_t oid, void *buf)
@@ -1388,39 +1367,23 @@ static void write_object_to(struct sd_vnode *vnode, uint64_t oid, void *buf)
}
}
-/*
- * Fix consistency of the replica of oid.
- *
- * XXX: The fix is rather dumb, just read the first copy and write it
- * to other replica.
- */
-static void do_check_repair(uint64_t oid, int nr_copies)
+static void do_check(uint64_t oid, int nr_copies)
{
struct sd_vnode *tgt_vnodes[nr_copies];
- void *buf, *buf_cmp;
+ unsigned char sha1[SD_MAX_COPIES][SHA1_LEN];
+ char hex[SD_MAX_COPIES][2*SHA1_LEN+1];
int i;
oid_to_vnodes(sd_vnodes, sd_vnodes_nr, oid, nr_copies, tgt_vnodes);
- buf = read_object_from(tgt_vnodes[0], oid);
- for (i = 1; i < nr_copies; i++) {
- buf_cmp = read_object_from(tgt_vnodes[i], oid);
- if (memcmp(buf, buf_cmp, SD_DATA_OBJ_SIZE)) {
- free(buf_cmp);
- goto fix_consistency;
- }
- free(buf_cmp);
+ for (i = 0; i < nr_copies; i++) {
+ get_obj_checksum_from(tgt_vnodes[i], oid, sha1[i]);
+ strncpy(hex[i], sha1_to_hex(sha1[i]), 2*SHA1_LEN+1);
+ dprintf("oid%"PRIx64", sha1:%s\n", oid, hex[i]);
}
- free(buf);
return;
-
-fix_consistency:
- for (i = 1; i < nr_copies; i++)
- write_object_to(tgt_vnodes[i], oid, buf);
- fprintf(stdout, "fix %"PRIx64" success\n", oid);
- free(buf);
}
-static int check_repair_vdi(uint32_t vid)
+static int check_vdi(uint32_t vid)
{
struct sheepdog_inode *inode;
int ret;
@@ -1443,7 +1406,7 @@ static int check_repair_vdi(uint32_t vid)
dvid = inode->data_vdi_id[idx];
if (dvid) {
oid = vid_to_data_oid(dvid, idx);
- do_check_repair(oid, inode->nr_copies);
+ do_check(oid, inode->nr_copies);
}
done += SD_DATA_OBJ_SIZE;
idx++;
@@ -1466,7 +1429,7 @@ static int vdi_check(int argc, char **argv)
goto out;
}
- ret = check_repair_vdi(vid);
+ ret = check_vdi(vid);
if (ret != EXIT_SUCCESS)
goto out;
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 83d98f1..6d46157 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -63,6 +63,7 @@
#define SD_OP_ENABLE_RECOVER 0xA8
#define SD_OP_DISABLE_RECOVER 0xA9
#define SD_OP_INFO_RECOVER 0xAA
+#define SD_OP_CALC_CHKSUM_PEER 0xAB
/* internal flags for hdr.flags, must be above 0x80 */
#define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/include/sheep.h b/include/sheep.h
index 719d18f..54b6eb3 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -279,4 +279,20 @@ static inline int nodes_to_vnodes(struct sd_node *nodes, int nr,
return nr_vnodes;
}
+static inline char *sha1_to_hex(const unsigned char *sha1)
+{
+ static char buffer[50];
+ static const char hex[] = "0123456789abcdef";
+ char *buf = buffer;
+ int i;
+
+ for (i = 0; i < SHA1_LEN; i++) {
+ unsigned int val = *sha1++;
+ *buf++ = hex[val >> 4];
+ *buf++ = hex[val & 0xf];
+ }
+ buffer[2 * SHA1_LEN] = 0;
+ return buffer;
+}
+
#endif
diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index 7eeae9a..bfc0c84 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -508,6 +508,8 @@ static int farm_read(uint64_t oid, struct siocb *iocb)
}
if (size != iocb->length) {
ret = SD_RES_EIO;
+ dprintf("oid:%"PRIx64", offset:%lu, length:%d, size:%zd\n", oid,
+ iocb->offset, iocb->length, size);
goto out;
}
out:
diff --git a/sheep/farm/farm.h b/sheep/farm/farm.h
index 27e65cd..d0b635a 100644
--- a/sheep/farm/farm.h
+++ b/sheep/farm/farm.h
@@ -53,7 +53,6 @@ extern char farm_obj_dir[PATH_MAX];
extern char *sha1_to_path(const unsigned char *sha1);
extern int sha1_file_write(unsigned char *buf, unsigned len, unsigned char *outsha1);
extern void *sha1_file_read(const unsigned char *sha1, struct sha1_file_hdr *hdr);
-extern char *sha1_to_hex(const unsigned char *sha1);
extern int get_sha1_hex(const char *hex, unsigned char *sha1);
extern int sha1_file_try_delete(const unsigned char *sha1);
diff --git a/sheep/farm/sha1_file.c b/sheep/farm/sha1_file.c
index 493ca79..3cd4f40 100644
--- a/sheep/farm/sha1_file.c
+++ b/sheep/farm/sha1_file.c
@@ -257,19 +257,3 @@ int get_sha1_hex(const char *hex, unsigned char *sha1)
}
return 0;
}
-
-char *sha1_to_hex(const unsigned char *sha1)
-{
- static char buffer[50];
- static const char hex[] = "0123456789abcdef";
- char *buf = buffer;
- int i;
-
- for (i = 0; i < SHA1_LEN; i++) {
- unsigned int val = *sha1++;
- *buf++ = hex[val >> 4];
- *buf++ = hex[val & 0xf];
- }
- buffer[2 * SHA1_LEN] = 0;
- return buffer;
-}
diff --git a/sheep/ops.c b/sheep/ops.c
index 8ca8748..1c3abea 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -26,6 +26,7 @@
#include "strbuf.h"
#include "trace/trace.h"
#include "util.h"
+#include "sha1.h"
enum sd_op_type {
SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
@@ -748,6 +749,70 @@ out:
return ret;
}
+int peer_calc_obj_chksum(struct request *req)
+{
+ struct sd_req *hdr = &req->rq;
+ struct sd_rsp *rsp = &req->rp;
+ uint32_t epoch = hdr->epoch;
+ unsigned char sha1[SHA1_LEN];
+ struct siocb iocb;
+ struct sha1_ctx ctx;
+ void *buf;
+ int ret;
+
+ if (sys->gateway_only)
+ return SD_RES_NO_OBJ;
+
+ assert(req->data_length == 0);
+ assert(hdr->data_length == 0);
+
+ hdr->data_length = SD_DATA_OBJ_SIZE;
+ if (is_vdi_obj(hdr->obj.oid))
+ hdr->data_length = SD_INODE_SIZE;
+
+ buf = xmalloc(hdr->data_length);
+ req->data = buf;
+ hdr->obj.offset = 0;
+
+ if (sys->enable_write_cache && !req->local
+ && !bypass_object_cache(req)) {
+ ret = object_cache_handle_request(req);
+ if (ret != SD_RES_SUCCESS)
+ goto out;
+
+ goto checksum;
+ }
+
+ dprintf("1\n");
+ memset(&iocb, 0, sizeof(iocb));
+ iocb.epoch = epoch;
+ iocb.flags = hdr->flags;
+
+ iocb.offset = hdr->obj.offset;
+ iocb.length = hdr->data_length;
+ iocb.buf = buf;
+
+ ret = sd_store->read(hdr->obj.oid, &iocb);
+ dprintf("oid:%"PRIx64", offset:%lu, length:%d, ret:%d\n", hdr->obj.oid,
+ iocb.offset, iocb.length, ret);
+ if (ret != SD_RES_SUCCESS)
+ goto out;
+checksum:
+ sha1_init(&ctx);
+ sha1_update(&ctx, buf, req->data_length);
+ sha1_final(&ctx, sha1);
+ memcpy(&rsp->__pad[0], sha1, SHA1_LEN);
+ dprintf("oid:%"PRIx64", sha1:%s\n", hdr->obj.oid, sha1_to_hex(sha1));
+out:
+ dprintf("0\n");
+ req->data = NULL;
+ req->data_length = 0;
+ hdr->data_length = 0;
+ rsp->data_length = 0;
+ free(buf);
+ return ret;
+}
+
static int do_write_obj(struct siocb *iocb, struct sd_req *hdr, uint32_t epoch,
void *data, int create)
{
@@ -1084,11 +1149,19 @@ static struct sd_op_template sd_ops[] = {
.type = SD_OP_TYPE_CLUSTER,
.process_main = cluster_disable_recover,
},
+
[SD_OP_INFO_RECOVER] = {
.name = "INFO_RECOVER",
.type = SD_OP_TYPE_LOCAL,
.process_main = local_info_recover,
},
+
+ [SD_OP_CALC_CHKSUM_PEER] = {
+ .name = "CALC_CHKSUM_PEER",
+ .type = SD_OP_TYPE_PEER,
+ .process_work = peer_calc_obj_chksum,
+ },
+
};
struct sd_op_template *get_sd_op(uint8_t opcode)
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 857cf87..9ef22f7 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -325,6 +325,7 @@ int peer_read_obj(struct request *req);
int peer_write_obj(struct request *req);
int peer_create_and_write_obj(struct request *req);
int peer_remove_obj(struct request *req);
+int peer_calc_obj_chksum(struct request *req);
/* object_cache */
--
1.7.11.4
More information about the sheepdog
mailing list