At Thu, 16 Aug 2012 22:38:21 +0800, Yunkai Zhang wrote: > > After add '-F' flag, the help looks like: > $ collie vdi check > Usage: collie vdi check [-F] [-s snapshot] [-a address] [-p port] [-h] <vdiname> > Options: > -F, --force_repair force repair object's copies (dangerous) How about '-r, --repair'? > fprintf(stderr, "Failed to read, %s\n", > sd_strerror(rsp->result)); > exit(EXIT_FAILURE); > } > - return buf; > + > + memcpy(sha1, (unsigned char *)&rsp->__pad[0], SHA1_LEN); Please define a member name instead of using __pad. > } > > -static void write_object_to(struct sd_vnode *vnode, uint64_t oid, void *buf) > +static int do_repair(uint64_t oid, struct node_id *src, struct node_id *dest) > { > struct sd_req hdr; > struct sd_rsp *rsp = (struct sd_rsp *)&hdr; > + unsigned rlen, wlen; > + char host[128]; > int fd, ret; > - unsigned wlen = SD_DATA_OBJ_SIZE, rlen = 0; > - char name[128]; > > - addr_to_str(name, sizeof(name), vnode->nid.addr, 0); > - fd = connect_to(name, vnode->nid.port); > + addr_to_str(host, sizeof(host), dest->addr, 0); > + > + fd = connect_to(host, dest->port); > if (fd < 0) { > - fprintf(stderr, "failed to connect to %s:%"PRIu32"\n", > - name, vnode->nid.port); > - exit(EXIT_FAILURE); > + fprintf(stderr, "Failed to connect\n"); > + return SD_RES_EIO; > } > > - sd_init_req(&hdr, SD_OP_WRITE_PEER); > - hdr.epoch = sd_epoch; > - hdr.flags = SD_FLAG_CMD_WRITE; > - hdr.data_length = wlen; > + sd_init_req(&hdr, SD_OP_REPAIR_OBJ_PEER); I don't think sending peer requests directly from outside sheeps is a good idea. How about making the gateway node forward the requests? > > + rlen = 0; > + wlen = sizeof(*src); > + > + hdr.epoch = sd_epoch; > hdr.obj.oid = oid; > + hdr.data_length = wlen; > + hdr.flags = SD_FLAG_CMD_WRITE; > > - ret = exec_req(fd, &hdr, buf, &wlen, &rlen); > + ret = exec_req(fd, &hdr, src, &wlen, &rlen); > close(fd); > - > if (ret) { > - fprintf(stderr, "Failed to execute request\n"); > - exit(EXIT_FAILURE); > + fprintf(stderr, "Failed to repair oid:%"PRIx64"\n", oid); > + return SD_RES_EIO; > } > - > if (rsp->result != SD_RES_SUCCESS) { > - fprintf(stderr, "Failed to read, %s\n", > - sd_strerror(rsp->result)); > - exit(EXIT_FAILURE); > + fprintf(stderr, "Failed to repair oid:%"PRIx64", %s\n", > + oid, sd_strerror(rsp->result)); > + return rsp->result; > } > + > + return SD_RES_SUCCESS; > } > > -/* > - * Fix consistency of the replica of oid. > - * > - * XXX: The fix is rather dumb, just read the first copy and write it > - * to other replica. > - */ > -static void do_check_repair(uint64_t oid, int nr_copies) > +static int do_check_repair(uint64_t oid, int nr_copies) > { > struct sd_vnode *tgt_vnodes[nr_copies]; > - void *buf, *buf_cmp; > - int i; > + unsigned char sha1[SD_MAX_COPIES][SHA1_LEN]; > + char host[128]; > + int i, j; > > oid_to_vnodes(sd_vnodes, sd_vnodes_nr, oid, nr_copies, tgt_vnodes); > - buf = read_object_from(tgt_vnodes[0], oid); > - for (i = 1; i < nr_copies; i++) { > - buf_cmp = read_object_from(tgt_vnodes[i], oid); > - if (memcmp(buf, buf_cmp, SD_DATA_OBJ_SIZE)) { > - free(buf_cmp); > - goto fix_consistency; > + for (i = 0; i < nr_copies; i++) { > + get_obj_checksum_from(tgt_vnodes[i], oid, sha1[i]); > + } > + > + for (i = 0; i < nr_copies; i++) { > + for (j = (i + 1); j < nr_copies; j++) { > + if (memcmp(sha1[i], sha1[j], SHA1_LEN)) > + goto diff; > } > - free(buf_cmp); > } > - free(buf); > - return; > + return 0; > > -fix_consistency: > - for (i = 1; i < nr_copies; i++) > - write_object_to(tgt_vnodes[i], oid, buf); > - fprintf(stdout, "fix %"PRIx64" success\n", oid); > - free(buf); > +diff: > + fprintf(stderr, "Failed oid: %"PRIx64"\n", oid); > + for (i = 0; i < nr_copies; i++) { > + addr_to_str(host, sizeof(host), tgt_vnodes[i]->nid.addr, 0); > + fprintf(stderr, ">> copy[%d], sha1: %s, from: %s:%d\n", > + i, sha1_to_hex(sha1[i]), host, tgt_vnodes[i]->nid.port); > + } > + > + if (!vdi_cmd_data.force_repair) > + return -1; > + > + /* > + * Force repair the consistency of oid's replica > + * > + * FIXME: this fix is rather dumb, it just read the > + * first copy and write it to other replica, > + */ > + fprintf(stderr, ">> force repairing ...\n"); > + addr_to_str(host, sizeof(host), tgt_vnodes[0]->nid.addr, > + tgt_vnodes[0]->nid.port); > + for (i = 1; i< nr_copies; i++) { Needs a space before '<'. > @@ -1466,18 +1474,22 @@ static int vdi_check(int argc, char **argv) > goto out; > } > > + printf("CHECKING VDI:%s ...\n", vdiname); > ret = check_repair_vdi(vid); > - if (ret != EXIT_SUCCESS) > + if (ret != EXIT_SUCCESS) { > + printf("%s\n", > + vdi_cmd_data.force_repair?"FORCE REPAIRED":"FAILED"); Need spaces around '?' and ':'. Thanks, Kazutaka |