[sheepdog] [PATCH 1/2] collie: optimize 'collie vdi check' command

Yunkai Zhang yunkai.me at gmail.com
Tue Aug 21 10:00:51 CEST 2012


On Tue, Aug 21, 2012 at 2:26 PM, Yunkai Zhang <yunkai.me at gmail.com> wrote:
> On Tue, Aug 21, 2012 at 1:48 PM, Yunkai Zhang <yunkai.me at gmail.com> wrote:
>> On Tue, Aug 21, 2012 at 1:42 PM, MORITA Kazutaka
>> <morita.kazutaka at lab.ntt.co.jp> wrote:
>>> At Thu, 16 Aug 2012 22:38:21 +0800,
>>> Yunkai Zhang wrote:
>>>>
>>>> After add '-F' flag, the help looks like:
>>>> $ collie vdi check
>>>> Usage: collie vdi check [-F] [-s snapshot] [-a address] [-p port] [-h] <vdiname>
>>>> Options:
>>>>   -F, --force_repair      force repair object's copies (dangerous)
>>>
>>> How about '-r, --repair'?
>>
>>
>> Good for me.
>
> When I submit this patch, my new patch "collie: add self options to
> collie's command" haven't been developed, so '-r, --repair' options
> would be conflict with '-r, --raw' option.

I found that '-r, --raw' option is also need by 'collie vdi list'
command, so there exist name conflict.

I would use '-R, --repair'.


>
> Since these two patches are dependent, I'll put them in a series in V2.
>
>>
>>
>>>
>>>>               fprintf(stderr, "Failed to read, %s\n",
>>>>                       sd_strerror(rsp->result));
>>>>               exit(EXIT_FAILURE);
>>>>       }
>>>> -     return buf;
>>>> +
>>>> +     memcpy(sha1, (unsigned char *)&rsp->__pad[0], SHA1_LEN);
>>>
>>> Please define a member name instead of using __pad.
>>
>> OK.
>>
>>>
>>>
>>>>  }
>>>>
>>>> -static void write_object_to(struct sd_vnode *vnode, uint64_t oid, void *buf)
>>>> +static int do_repair(uint64_t oid, struct node_id *src, struct node_id *dest)
>>>>  {
>>>>       struct sd_req hdr;
>>>>       struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
>>>> +     unsigned rlen, wlen;
>>>> +     char host[128];
>>>>       int fd, ret;
>>>> -     unsigned wlen = SD_DATA_OBJ_SIZE, rlen = 0;
>>>> -     char name[128];
>>>>
>>>> -     addr_to_str(name, sizeof(name), vnode->nid.addr, 0);
>>>> -     fd = connect_to(name, vnode->nid.port);
>>>> +     addr_to_str(host, sizeof(host), dest->addr, 0);
>>>> +
>>>> +     fd = connect_to(host, dest->port);
>>>>       if (fd < 0) {
>>>> -             fprintf(stderr, "failed to connect to %s:%"PRIu32"\n",
>>>> -                     name, vnode->nid.port);
>>>> -             exit(EXIT_FAILURE);
>>>> +             fprintf(stderr, "Failed to connect\n");
>>>> +             return SD_RES_EIO;
>>>>       }
>>>>
>>>> -     sd_init_req(&hdr, SD_OP_WRITE_PEER);
>>>> -     hdr.epoch = sd_epoch;
>>>> -     hdr.flags = SD_FLAG_CMD_WRITE;
>>>> -     hdr.data_length = wlen;
>>>> +     sd_init_req(&hdr, SD_OP_REPAIR_OBJ_PEER);
>>>
>>> I don't think sending peer requests directly from outside sheeps is a
>>> good idea.  How about making the gateway node forward the requests?
>>
>> Ok, no problem.
>>
>>
>>>
>>>>
>>>> +     rlen = 0;
>>>> +     wlen = sizeof(*src);
>>>> +
>>>> +     hdr.epoch = sd_epoch;
>>>>       hdr.obj.oid = oid;
>>>> +     hdr.data_length = wlen;
>>>> +     hdr.flags = SD_FLAG_CMD_WRITE;
>>>>
>>>> -     ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
>>>> +     ret = exec_req(fd, &hdr, src, &wlen, &rlen);
>>>>       close(fd);
>>>> -
>>>>       if (ret) {
>>>> -             fprintf(stderr, "Failed to execute request\n");
>>>> -             exit(EXIT_FAILURE);
>>>> +             fprintf(stderr, "Failed to repair oid:%"PRIx64"\n", oid);
>>>> +             return SD_RES_EIO;
>>>>       }
>>>> -
>>>>       if (rsp->result != SD_RES_SUCCESS) {
>>>> -             fprintf(stderr, "Failed to read, %s\n",
>>>> -                     sd_strerror(rsp->result));
>>>> -             exit(EXIT_FAILURE);
>>>> +             fprintf(stderr, "Failed to repair oid:%"PRIx64", %s\n",
>>>> +                     oid, sd_strerror(rsp->result));
>>>> +             return rsp->result;
>>>>       }
>>>> +
>>>> +     return SD_RES_SUCCESS;
>>>>  }
>>>>
>>>> -/*
>>>> - * Fix consistency of the replica of oid.
>>>> - *
>>>> - * XXX: The fix is rather dumb, just read the first copy and write it
>>>> - * to other replica.
>>>> - */
>>>> -static void do_check_repair(uint64_t oid, int nr_copies)
>>>> +static int do_check_repair(uint64_t oid, int nr_copies)
>>>>  {
>>>>       struct sd_vnode *tgt_vnodes[nr_copies];
>>>> -     void *buf, *buf_cmp;
>>>> -     int i;
>>>> +     unsigned char sha1[SD_MAX_COPIES][SHA1_LEN];
>>>> +     char host[128];
>>>> +     int i, j;
>>>>
>>>>       oid_to_vnodes(sd_vnodes, sd_vnodes_nr, oid, nr_copies, tgt_vnodes);
>>>> -     buf = read_object_from(tgt_vnodes[0], oid);
>>>> -     for (i = 1; i < nr_copies; i++) {
>>>> -             buf_cmp = read_object_from(tgt_vnodes[i], oid);
>>>> -             if (memcmp(buf, buf_cmp, SD_DATA_OBJ_SIZE)) {
>>>> -                     free(buf_cmp);
>>>> -                     goto fix_consistency;
>>>> +     for (i = 0; i < nr_copies; i++) {
>>>> +             get_obj_checksum_from(tgt_vnodes[i], oid, sha1[i]);
>>>> +     }
>>>> +
>>>> +     for (i = 0; i < nr_copies; i++) {
>>>> +             for (j = (i + 1); j < nr_copies; j++) {
>>>> +                     if (memcmp(sha1[i], sha1[j], SHA1_LEN))
>>>> +                             goto diff;
>>>>               }
>>>> -             free(buf_cmp);
>>>>       }
>>>> -     free(buf);
>>>> -     return;
>>>> +     return 0;
>>>>
>>>> -fix_consistency:
>>>> -     for (i = 1; i < nr_copies; i++)
>>>> -             write_object_to(tgt_vnodes[i], oid, buf);
>>>> -     fprintf(stdout, "fix %"PRIx64" success\n", oid);
>>>> -     free(buf);
>>>> +diff:
>>>> +     fprintf(stderr, "Failed oid: %"PRIx64"\n", oid);
>>>> +     for (i = 0; i < nr_copies; i++) {
>>>> +             addr_to_str(host, sizeof(host), tgt_vnodes[i]->nid.addr, 0);
>>>> +             fprintf(stderr, ">> copy[%d], sha1: %s, from: %s:%d\n",
>>>> +                     i, sha1_to_hex(sha1[i]), host, tgt_vnodes[i]->nid.port);
>>>> +     }
>>>> +
>>>> +     if (!vdi_cmd_data.force_repair)
>>>> +             return -1;
>>>> +
>>>> +     /*
>>>> +      * Force repair the consistency of oid's replica
>>>> +      *
>>>> +      * FIXME: this fix is rather dumb, it just read the
>>>> +      * first copy and write it to other replica,
>>>> +      */
>>>> +     fprintf(stderr, ">> force repairing ...\n");
>>>> +     addr_to_str(host, sizeof(host), tgt_vnodes[0]->nid.addr,
>>>> +                 tgt_vnodes[0]->nid.port);
>>>> +     for (i = 1; i< nr_copies; i++) {
>>>
>>> Needs a space before '<'.
>>>
>>>> @@ -1466,18 +1474,22 @@ static int vdi_check(int argc, char **argv)
>>>>               goto out;
>>>>       }
>>>>
>>>> +     printf("CHECKING VDI:%s ...\n", vdiname);
>>>>       ret = check_repair_vdi(vid);
>>>> -     if (ret != EXIT_SUCCESS)
>>>> +     if (ret != EXIT_SUCCESS) {
>>>> +             printf("%s\n",
>>>> +                    vdi_cmd_data.force_repair?"FORCE REPAIRED":"FAILED");
>>>
>>> Need spaces around '?' and ':'.
>>
>> Ok, I forgot to check it.
>>
>> I will give V2 later.
>>
>>>
>>> Thanks,
>>>
>>> Kazutaka
>>
>>
>>
>> --
>> Yunkai Zhang
>> Work at Taobao
>
>
>
> --
> Yunkai Zhang
> Work at Taobao



-- 
Yunkai Zhang
Work at Taobao



More information about the sheepdog mailing list