On Tue, Aug 21, 2012 at 1:42 PM, MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp> wrote: > At Thu, 16 Aug 2012 22:38:21 +0800, > Yunkai Zhang wrote: >> >> After add '-F' flag, the help looks like: >> $ collie vdi check >> Usage: collie vdi check [-F] [-s snapshot] [-a address] [-p port] [-h] <vdiname> >> Options: >> -F, --force_repair force repair object's copies (dangerous) > > How about '-r, --repair'? Good for me. > >> fprintf(stderr, "Failed to read, %s\n", >> sd_strerror(rsp->result)); >> exit(EXIT_FAILURE); >> } >> - return buf; >> + >> + memcpy(sha1, (unsigned char *)&rsp->__pad[0], SHA1_LEN); > > Please define a member name instead of using __pad. OK. > > >> } >> >> -static void write_object_to(struct sd_vnode *vnode, uint64_t oid, void *buf) >> +static int do_repair(uint64_t oid, struct node_id *src, struct node_id *dest) >> { >> struct sd_req hdr; >> struct sd_rsp *rsp = (struct sd_rsp *)&hdr; >> + unsigned rlen, wlen; >> + char host[128]; >> int fd, ret; >> - unsigned wlen = SD_DATA_OBJ_SIZE, rlen = 0; >> - char name[128]; >> >> - addr_to_str(name, sizeof(name), vnode->nid.addr, 0); >> - fd = connect_to(name, vnode->nid.port); >> + addr_to_str(host, sizeof(host), dest->addr, 0); >> + >> + fd = connect_to(host, dest->port); >> if (fd < 0) { >> - fprintf(stderr, "failed to connect to %s:%"PRIu32"\n", >> - name, vnode->nid.port); >> - exit(EXIT_FAILURE); >> + fprintf(stderr, "Failed to connect\n"); >> + return SD_RES_EIO; >> } >> >> - sd_init_req(&hdr, SD_OP_WRITE_PEER); >> - hdr.epoch = sd_epoch; >> - hdr.flags = SD_FLAG_CMD_WRITE; >> - hdr.data_length = wlen; >> + sd_init_req(&hdr, SD_OP_REPAIR_OBJ_PEER); > > I don't think sending peer requests directly from outside sheeps is a > good idea. How about making the gateway node forward the requests? Ok, no problem. > >> >> + rlen = 0; >> + wlen = sizeof(*src); >> + >> + hdr.epoch = sd_epoch; >> hdr.obj.oid = oid; >> + hdr.data_length = wlen; >> + hdr.flags = SD_FLAG_CMD_WRITE; >> >> - ret = exec_req(fd, &hdr, buf, &wlen, &rlen); >> + ret = exec_req(fd, &hdr, src, &wlen, &rlen); >> close(fd); >> - >> if (ret) { >> - fprintf(stderr, "Failed to execute request\n"); >> - exit(EXIT_FAILURE); >> + fprintf(stderr, "Failed to repair oid:%"PRIx64"\n", oid); >> + return SD_RES_EIO; >> } >> - >> if (rsp->result != SD_RES_SUCCESS) { >> - fprintf(stderr, "Failed to read, %s\n", >> - sd_strerror(rsp->result)); >> - exit(EXIT_FAILURE); >> + fprintf(stderr, "Failed to repair oid:%"PRIx64", %s\n", >> + oid, sd_strerror(rsp->result)); >> + return rsp->result; >> } >> + >> + return SD_RES_SUCCESS; >> } >> >> -/* >> - * Fix consistency of the replica of oid. >> - * >> - * XXX: The fix is rather dumb, just read the first copy and write it >> - * to other replica. >> - */ >> -static void do_check_repair(uint64_t oid, int nr_copies) >> +static int do_check_repair(uint64_t oid, int nr_copies) >> { >> struct sd_vnode *tgt_vnodes[nr_copies]; >> - void *buf, *buf_cmp; >> - int i; >> + unsigned char sha1[SD_MAX_COPIES][SHA1_LEN]; >> + char host[128]; >> + int i, j; >> >> oid_to_vnodes(sd_vnodes, sd_vnodes_nr, oid, nr_copies, tgt_vnodes); >> - buf = read_object_from(tgt_vnodes[0], oid); >> - for (i = 1; i < nr_copies; i++) { >> - buf_cmp = read_object_from(tgt_vnodes[i], oid); >> - if (memcmp(buf, buf_cmp, SD_DATA_OBJ_SIZE)) { >> - free(buf_cmp); >> - goto fix_consistency; >> + for (i = 0; i < nr_copies; i++) { >> + get_obj_checksum_from(tgt_vnodes[i], oid, sha1[i]); >> + } >> + >> + for (i = 0; i < nr_copies; i++) { >> + for (j = (i + 1); j < nr_copies; j++) { >> + if (memcmp(sha1[i], sha1[j], SHA1_LEN)) >> + goto diff; >> } >> - free(buf_cmp); >> } >> - free(buf); >> - return; >> + return 0; >> >> -fix_consistency: >> - for (i = 1; i < nr_copies; i++) >> - write_object_to(tgt_vnodes[i], oid, buf); >> - fprintf(stdout, "fix %"PRIx64" success\n", oid); >> - free(buf); >> +diff: >> + fprintf(stderr, "Failed oid: %"PRIx64"\n", oid); >> + for (i = 0; i < nr_copies; i++) { >> + addr_to_str(host, sizeof(host), tgt_vnodes[i]->nid.addr, 0); >> + fprintf(stderr, ">> copy[%d], sha1: %s, from: %s:%d\n", >> + i, sha1_to_hex(sha1[i]), host, tgt_vnodes[i]->nid.port); >> + } >> + >> + if (!vdi_cmd_data.force_repair) >> + return -1; >> + >> + /* >> + * Force repair the consistency of oid's replica >> + * >> + * FIXME: this fix is rather dumb, it just read the >> + * first copy and write it to other replica, >> + */ >> + fprintf(stderr, ">> force repairing ...\n"); >> + addr_to_str(host, sizeof(host), tgt_vnodes[0]->nid.addr, >> + tgt_vnodes[0]->nid.port); >> + for (i = 1; i< nr_copies; i++) { > > Needs a space before '<'. > >> @@ -1466,18 +1474,22 @@ static int vdi_check(int argc, char **argv) >> goto out; >> } >> >> + printf("CHECKING VDI:%s ...\n", vdiname); >> ret = check_repair_vdi(vid); >> - if (ret != EXIT_SUCCESS) >> + if (ret != EXIT_SUCCESS) { >> + printf("%s\n", >> + vdi_cmd_data.force_repair?"FORCE REPAIRED":"FAILED"); > > Need spaces around '?' and ':'. Ok, I forgot to check it. I will give V2 later. > > Thanks, > > Kazutaka -- Yunkai Zhang Work at Taobao |