[sheepdog] [PATCH 1/2] collie: optimize 'collie vdi check' command
Yunkai Zhang
yunkai.me at gmail.com
Tue Aug 21 10:00:51 CEST 2012
On Tue, Aug 21, 2012 at 2:26 PM, Yunkai Zhang <yunkai.me at gmail.com> wrote:
> On Tue, Aug 21, 2012 at 1:48 PM, Yunkai Zhang <yunkai.me at gmail.com> wrote:
>> On Tue, Aug 21, 2012 at 1:42 PM, MORITA Kazutaka
>> <morita.kazutaka at lab.ntt.co.jp> wrote:
>>> At Thu, 16 Aug 2012 22:38:21 +0800,
>>> Yunkai Zhang wrote:
>>>>
>>>> After add '-F' flag, the help looks like:
>>>> $ collie vdi check
>>>> Usage: collie vdi check [-F] [-s snapshot] [-a address] [-p port] [-h] <vdiname>
>>>> Options:
>>>> -F, --force_repair force repair object's copies (dangerous)
>>>
>>> How about '-r, --repair'?
>>
>>
>> Good for me.
>
> When I submit this patch, my new patch "collie: add self options to
> collie's command" haven't been developed, so '-r, --repair' options
> would be conflict with '-r, --raw' option.
I found that '-r, --raw' option is also need by 'collie vdi list'
command, so there exist name conflict.
I would use '-R, --repair'.
>
> Since these two patches are dependent, I'll put them in a series in V2.
>
>>
>>
>>>
>>>> fprintf(stderr, "Failed to read, %s\n",
>>>> sd_strerror(rsp->result));
>>>> exit(EXIT_FAILURE);
>>>> }
>>>> - return buf;
>>>> +
>>>> + memcpy(sha1, (unsigned char *)&rsp->__pad[0], SHA1_LEN);
>>>
>>> Please define a member name instead of using __pad.
>>
>> OK.
>>
>>>
>>>
>>>> }
>>>>
>>>> -static void write_object_to(struct sd_vnode *vnode, uint64_t oid, void *buf)
>>>> +static int do_repair(uint64_t oid, struct node_id *src, struct node_id *dest)
>>>> {
>>>> struct sd_req hdr;
>>>> struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
>>>> + unsigned rlen, wlen;
>>>> + char host[128];
>>>> int fd, ret;
>>>> - unsigned wlen = SD_DATA_OBJ_SIZE, rlen = 0;
>>>> - char name[128];
>>>>
>>>> - addr_to_str(name, sizeof(name), vnode->nid.addr, 0);
>>>> - fd = connect_to(name, vnode->nid.port);
>>>> + addr_to_str(host, sizeof(host), dest->addr, 0);
>>>> +
>>>> + fd = connect_to(host, dest->port);
>>>> if (fd < 0) {
>>>> - fprintf(stderr, "failed to connect to %s:%"PRIu32"\n",
>>>> - name, vnode->nid.port);
>>>> - exit(EXIT_FAILURE);
>>>> + fprintf(stderr, "Failed to connect\n");
>>>> + return SD_RES_EIO;
>>>> }
>>>>
>>>> - sd_init_req(&hdr, SD_OP_WRITE_PEER);
>>>> - hdr.epoch = sd_epoch;
>>>> - hdr.flags = SD_FLAG_CMD_WRITE;
>>>> - hdr.data_length = wlen;
>>>> + sd_init_req(&hdr, SD_OP_REPAIR_OBJ_PEER);
>>>
>>> I don't think sending peer requests directly from outside sheeps is a
>>> good idea. How about making the gateway node forward the requests?
>>
>> Ok, no problem.
>>
>>
>>>
>>>>
>>>> + rlen = 0;
>>>> + wlen = sizeof(*src);
>>>> +
>>>> + hdr.epoch = sd_epoch;
>>>> hdr.obj.oid = oid;
>>>> + hdr.data_length = wlen;
>>>> + hdr.flags = SD_FLAG_CMD_WRITE;
>>>>
>>>> - ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
>>>> + ret = exec_req(fd, &hdr, src, &wlen, &rlen);
>>>> close(fd);
>>>> -
>>>> if (ret) {
>>>> - fprintf(stderr, "Failed to execute request\n");
>>>> - exit(EXIT_FAILURE);
>>>> + fprintf(stderr, "Failed to repair oid:%"PRIx64"\n", oid);
>>>> + return SD_RES_EIO;
>>>> }
>>>> -
>>>> if (rsp->result != SD_RES_SUCCESS) {
>>>> - fprintf(stderr, "Failed to read, %s\n",
>>>> - sd_strerror(rsp->result));
>>>> - exit(EXIT_FAILURE);
>>>> + fprintf(stderr, "Failed to repair oid:%"PRIx64", %s\n",
>>>> + oid, sd_strerror(rsp->result));
>>>> + return rsp->result;
>>>> }
>>>> +
>>>> + return SD_RES_SUCCESS;
>>>> }
>>>>
>>>> -/*
>>>> - * Fix consistency of the replica of oid.
>>>> - *
>>>> - * XXX: The fix is rather dumb, just read the first copy and write it
>>>> - * to other replica.
>>>> - */
>>>> -static void do_check_repair(uint64_t oid, int nr_copies)
>>>> +static int do_check_repair(uint64_t oid, int nr_copies)
>>>> {
>>>> struct sd_vnode *tgt_vnodes[nr_copies];
>>>> - void *buf, *buf_cmp;
>>>> - int i;
>>>> + unsigned char sha1[SD_MAX_COPIES][SHA1_LEN];
>>>> + char host[128];
>>>> + int i, j;
>>>>
>>>> oid_to_vnodes(sd_vnodes, sd_vnodes_nr, oid, nr_copies, tgt_vnodes);
>>>> - buf = read_object_from(tgt_vnodes[0], oid);
>>>> - for (i = 1; i < nr_copies; i++) {
>>>> - buf_cmp = read_object_from(tgt_vnodes[i], oid);
>>>> - if (memcmp(buf, buf_cmp, SD_DATA_OBJ_SIZE)) {
>>>> - free(buf_cmp);
>>>> - goto fix_consistency;
>>>> + for (i = 0; i < nr_copies; i++) {
>>>> + get_obj_checksum_from(tgt_vnodes[i], oid, sha1[i]);
>>>> + }
>>>> +
>>>> + for (i = 0; i < nr_copies; i++) {
>>>> + for (j = (i + 1); j < nr_copies; j++) {
>>>> + if (memcmp(sha1[i], sha1[j], SHA1_LEN))
>>>> + goto diff;
>>>> }
>>>> - free(buf_cmp);
>>>> }
>>>> - free(buf);
>>>> - return;
>>>> + return 0;
>>>>
>>>> -fix_consistency:
>>>> - for (i = 1; i < nr_copies; i++)
>>>> - write_object_to(tgt_vnodes[i], oid, buf);
>>>> - fprintf(stdout, "fix %"PRIx64" success\n", oid);
>>>> - free(buf);
>>>> +diff:
>>>> + fprintf(stderr, "Failed oid: %"PRIx64"\n", oid);
>>>> + for (i = 0; i < nr_copies; i++) {
>>>> + addr_to_str(host, sizeof(host), tgt_vnodes[i]->nid.addr, 0);
>>>> + fprintf(stderr, ">> copy[%d], sha1: %s, from: %s:%d\n",
>>>> + i, sha1_to_hex(sha1[i]), host, tgt_vnodes[i]->nid.port);
>>>> + }
>>>> +
>>>> + if (!vdi_cmd_data.force_repair)
>>>> + return -1;
>>>> +
>>>> + /*
>>>> + * Force repair the consistency of oid's replica
>>>> + *
>>>> + * FIXME: this fix is rather dumb, it just read the
>>>> + * first copy and write it to other replica,
>>>> + */
>>>> + fprintf(stderr, ">> force repairing ...\n");
>>>> + addr_to_str(host, sizeof(host), tgt_vnodes[0]->nid.addr,
>>>> + tgt_vnodes[0]->nid.port);
>>>> + for (i = 1; i< nr_copies; i++) {
>>>
>>> Needs a space before '<'.
>>>
>>>> @@ -1466,18 +1474,22 @@ static int vdi_check(int argc, char **argv)
>>>> goto out;
>>>> }
>>>>
>>>> + printf("CHECKING VDI:%s ...\n", vdiname);
>>>> ret = check_repair_vdi(vid);
>>>> - if (ret != EXIT_SUCCESS)
>>>> + if (ret != EXIT_SUCCESS) {
>>>> + printf("%s\n",
>>>> + vdi_cmd_data.force_repair?"FORCE REPAIRED":"FAILED");
>>>
>>> Need spaces around '?' and ':'.
>>
>> Ok, I forgot to check it.
>>
>> I will give V2 later.
>>
>>>
>>> Thanks,
>>>
>>> Kazutaka
>>
>>
>>
>> --
>> Yunkai Zhang
>> Work at Taobao
>
>
>
> --
> Yunkai Zhang
> Work at Taobao
--
Yunkai Zhang
Work at Taobao
More information about the sheepdog
mailing list