[sheepdog] [PATCH 1/2] collie: optimize 'collie vdi check' command

Yunkai Zhang yunkai.me at gmail.com
Tue Aug 21 08:26:19 CEST 2012


On Tue, Aug 21, 2012 at 1:48 PM, Yunkai Zhang <yunkai.me at gmail.com> wrote:
> On Tue, Aug 21, 2012 at 1:42 PM, MORITA Kazutaka
> <morita.kazutaka at lab.ntt.co.jp> wrote:
>> At Thu, 16 Aug 2012 22:38:21 +0800,
>> Yunkai Zhang wrote:
>>>
>>> After add '-F' flag, the help looks like:
>>> $ collie vdi check
>>> Usage: collie vdi check [-F] [-s snapshot] [-a address] [-p port] [-h] <vdiname>
>>> Options:
>>>   -F, --force_repair      force repair object's copies (dangerous)
>>
>> How about '-r, --repair'?
>
>
> Good for me.

When I submit this patch, my new patch "collie: add self options to
collie's command" haven't been developed, so '-r, --repair' options
would be conflict with '-r, --raw' option.

Since these two patches are dependent, I'll put them in a series in V2.

>
>
>>
>>>               fprintf(stderr, "Failed to read, %s\n",
>>>                       sd_strerror(rsp->result));
>>>               exit(EXIT_FAILURE);
>>>       }
>>> -     return buf;
>>> +
>>> +     memcpy(sha1, (unsigned char *)&rsp->__pad[0], SHA1_LEN);
>>
>> Please define a member name instead of using __pad.
>
> OK.
>
>>
>>
>>>  }
>>>
>>> -static void write_object_to(struct sd_vnode *vnode, uint64_t oid, void *buf)
>>> +static int do_repair(uint64_t oid, struct node_id *src, struct node_id *dest)
>>>  {
>>>       struct sd_req hdr;
>>>       struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
>>> +     unsigned rlen, wlen;
>>> +     char host[128];
>>>       int fd, ret;
>>> -     unsigned wlen = SD_DATA_OBJ_SIZE, rlen = 0;
>>> -     char name[128];
>>>
>>> -     addr_to_str(name, sizeof(name), vnode->nid.addr, 0);
>>> -     fd = connect_to(name, vnode->nid.port);
>>> +     addr_to_str(host, sizeof(host), dest->addr, 0);
>>> +
>>> +     fd = connect_to(host, dest->port);
>>>       if (fd < 0) {
>>> -             fprintf(stderr, "failed to connect to %s:%"PRIu32"\n",
>>> -                     name, vnode->nid.port);
>>> -             exit(EXIT_FAILURE);
>>> +             fprintf(stderr, "Failed to connect\n");
>>> +             return SD_RES_EIO;
>>>       }
>>>
>>> -     sd_init_req(&hdr, SD_OP_WRITE_PEER);
>>> -     hdr.epoch = sd_epoch;
>>> -     hdr.flags = SD_FLAG_CMD_WRITE;
>>> -     hdr.data_length = wlen;
>>> +     sd_init_req(&hdr, SD_OP_REPAIR_OBJ_PEER);
>>
>> I don't think sending peer requests directly from outside sheeps is a
>> good idea.  How about making the gateway node forward the requests?
>
> Ok, no problem.
>
>
>>
>>>
>>> +     rlen = 0;
>>> +     wlen = sizeof(*src);
>>> +
>>> +     hdr.epoch = sd_epoch;
>>>       hdr.obj.oid = oid;
>>> +     hdr.data_length = wlen;
>>> +     hdr.flags = SD_FLAG_CMD_WRITE;
>>>
>>> -     ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
>>> +     ret = exec_req(fd, &hdr, src, &wlen, &rlen);
>>>       close(fd);
>>> -
>>>       if (ret) {
>>> -             fprintf(stderr, "Failed to execute request\n");
>>> -             exit(EXIT_FAILURE);
>>> +             fprintf(stderr, "Failed to repair oid:%"PRIx64"\n", oid);
>>> +             return SD_RES_EIO;
>>>       }
>>> -
>>>       if (rsp->result != SD_RES_SUCCESS) {
>>> -             fprintf(stderr, "Failed to read, %s\n",
>>> -                     sd_strerror(rsp->result));
>>> -             exit(EXIT_FAILURE);
>>> +             fprintf(stderr, "Failed to repair oid:%"PRIx64", %s\n",
>>> +                     oid, sd_strerror(rsp->result));
>>> +             return rsp->result;
>>>       }
>>> +
>>> +     return SD_RES_SUCCESS;
>>>  }
>>>
>>> -/*
>>> - * Fix consistency of the replica of oid.
>>> - *
>>> - * XXX: The fix is rather dumb, just read the first copy and write it
>>> - * to other replica.
>>> - */
>>> -static void do_check_repair(uint64_t oid, int nr_copies)
>>> +static int do_check_repair(uint64_t oid, int nr_copies)
>>>  {
>>>       struct sd_vnode *tgt_vnodes[nr_copies];
>>> -     void *buf, *buf_cmp;
>>> -     int i;
>>> +     unsigned char sha1[SD_MAX_COPIES][SHA1_LEN];
>>> +     char host[128];
>>> +     int i, j;
>>>
>>>       oid_to_vnodes(sd_vnodes, sd_vnodes_nr, oid, nr_copies, tgt_vnodes);
>>> -     buf = read_object_from(tgt_vnodes[0], oid);
>>> -     for (i = 1; i < nr_copies; i++) {
>>> -             buf_cmp = read_object_from(tgt_vnodes[i], oid);
>>> -             if (memcmp(buf, buf_cmp, SD_DATA_OBJ_SIZE)) {
>>> -                     free(buf_cmp);
>>> -                     goto fix_consistency;
>>> +     for (i = 0; i < nr_copies; i++) {
>>> +             get_obj_checksum_from(tgt_vnodes[i], oid, sha1[i]);
>>> +     }
>>> +
>>> +     for (i = 0; i < nr_copies; i++) {
>>> +             for (j = (i + 1); j < nr_copies; j++) {
>>> +                     if (memcmp(sha1[i], sha1[j], SHA1_LEN))
>>> +                             goto diff;
>>>               }
>>> -             free(buf_cmp);
>>>       }
>>> -     free(buf);
>>> -     return;
>>> +     return 0;
>>>
>>> -fix_consistency:
>>> -     for (i = 1; i < nr_copies; i++)
>>> -             write_object_to(tgt_vnodes[i], oid, buf);
>>> -     fprintf(stdout, "fix %"PRIx64" success\n", oid);
>>> -     free(buf);
>>> +diff:
>>> +     fprintf(stderr, "Failed oid: %"PRIx64"\n", oid);
>>> +     for (i = 0; i < nr_copies; i++) {
>>> +             addr_to_str(host, sizeof(host), tgt_vnodes[i]->nid.addr, 0);
>>> +             fprintf(stderr, ">> copy[%d], sha1: %s, from: %s:%d\n",
>>> +                     i, sha1_to_hex(sha1[i]), host, tgt_vnodes[i]->nid.port);
>>> +     }
>>> +
>>> +     if (!vdi_cmd_data.force_repair)
>>> +             return -1;
>>> +
>>> +     /*
>>> +      * Force repair the consistency of oid's replica
>>> +      *
>>> +      * FIXME: this fix is rather dumb, it just read the
>>> +      * first copy and write it to other replica,
>>> +      */
>>> +     fprintf(stderr, ">> force repairing ...\n");
>>> +     addr_to_str(host, sizeof(host), tgt_vnodes[0]->nid.addr,
>>> +                 tgt_vnodes[0]->nid.port);
>>> +     for (i = 1; i< nr_copies; i++) {
>>
>> Needs a space before '<'.
>>
>>> @@ -1466,18 +1474,22 @@ static int vdi_check(int argc, char **argv)
>>>               goto out;
>>>       }
>>>
>>> +     printf("CHECKING VDI:%s ...\n", vdiname);
>>>       ret = check_repair_vdi(vid);
>>> -     if (ret != EXIT_SUCCESS)
>>> +     if (ret != EXIT_SUCCESS) {
>>> +             printf("%s\n",
>>> +                    vdi_cmd_data.force_repair?"FORCE REPAIRED":"FAILED");
>>
>> Need spaces around '?' and ':'.
>
> Ok, I forgot to check it.
>
> I will give V2 later.
>
>>
>> Thanks,
>>
>> Kazutaka
>
>
>
> --
> Yunkai Zhang
> Work at Taobao



-- 
Yunkai Zhang
Work at Taobao



More information about the sheepdog mailing list