[sheepdog] [PATCH v3] add vdi check -e to quickly check which objects are lost.

Hitoshi Mitake mitake.hitoshi at gmail.com
Sun Jul 27 10:36:56 CEST 2014


On Tue, Jul 1, 2014 at 10:51 AM, Ruoyu <liangry at ucweb.com> wrote:
> How about this patch regardless of the opcode?
>
> The request SD_OP_OIDS_EXIST requires a new operation both reading and
> writing. Is there a better way to satisfy it?

This patch (including the new opcode) looks good to me. Could you
rebase it on the latest master?

Thanks,
Hitoshi

>
>
> On 2014年05月28日 17:48, Ruoyu wrote:
>>
>> Sometimes we want to quickly check whether some of the vdi objects
>> or data objects are lost due to unexpected issue.
>>
>> Although vdi check will do, it spends a lot of time because of
>> too many client-server communication. And the probability of
>> triggering data auto fixing is quite low since the writing process
>> is strong consistency.
>>
>> Therefore, the new option -e (--exist) check whether all the objects
>> related to the vdi are existed or not. It is fast because it submit
>> the batched object id only one time per node. I think this is enough
>> for the situation.
>>
>> Usage: dog vdi check -e <vdiname>
>>
>> Example:
>> $ dog vdi check -e test
>> test is fine, no object is missing.
>>
>> $ dog vdi check -e ucweb
>> [127.0.0.1:7001] oid 80b8071d00000000 is missing.
>> [127.0.0.1:7001] oid 00b8071d000000ee is missing.
>> ucweb lost 2 object(s).
>>
>> Signed-off-by: Ruoyu <liangry at ucweb.com>
>> ---
>>   dog/vdi.c                | 114
>> ++++++++++++++++++++++++++++++++++++++++++++++-
>>   include/internal_proto.h |   9 ++++
>>   include/sheep.h          |   6 +++
>>   include/sheepdog_proto.h |   1 +
>>   lib/net.c                |   2 +-
>>   sheep/ops.c              |  31 +++++++++++++
>>   6 files changed, 160 insertions(+), 3 deletions(-)
>>
>> diff --git a/dog/vdi.c b/dog/vdi.c
>> index 866cb36..45fe6a9 100644
>> --- a/dog/vdi.c
>> +++ b/dog/vdi.c
>> @@ -21,6 +21,8 @@
>>   #include "sha1.h"
>>   #include "fec.h"
>>   +struct rb_root oid_tree = RB_ROOT;
>> +
>>   static struct sd_option vdi_options[] = {
>>         {'P', "prealloc", false, "preallocate all the data objects"},
>>         {'n', "no-share", false, "share nothing with its parent"},
>> @@ -34,6 +36,7 @@ static struct sd_option vdi_options[] = {
>>         {'f', "force", false, "do operation forcibly"},
>>         {'y', "hyper", false, "create a hyper volume"},
>>         {'o', "oid", true, "specify the object id of the tracking
>> object"},
>> +       {'e', "exist", false, "check objects exist or not only, no
>> repairing"},
>>         { 0, NULL, false, NULL },
>>   };
>>   @@ -53,6 +56,7 @@ static struct vdi_cmd_data {
>>         uint8_t store_policy;
>>         uint64_t oid;
>>         bool no_share;
>> +       bool exist;
>>   } vdi_cmd_data = { ~0, };
>>     struct get_vdi_info {
>> @@ -875,6 +879,106 @@ out:
>>         return ret;
>>   }
>>   +#define OIDS_INIT_LENGTH 1024
>> +
>> +static void save_oid(uint64_t oid, int copies)
>> +{
>> +       const struct sd_vnode *vnodes[SD_MAX_COPIES];
>> +       struct oid_entry *entry;
>> +
>> +       oid_to_vnodes(oid, &sd_vroot, copies, vnodes);
>> +       for (int i = 0; i < copies; i++) {
>> +               struct oid_entry key = {
>> +                       .node = (struct sd_node *) vnodes[i]->node
>> +               };
>> +               entry = rb_search(&oid_tree, &key, rb, oid_entry_cmp);
>> +               if (!entry)
>> +                       panic("rb_search() failure.");
>> +
>> +               if (entry->last >= entry->end) {
>> +                       entry->end *= 2;
>> +                       entry->oids = xrealloc(entry->oids,
>> +                                       sizeof(uint64_t) * entry->end);
>> +               }
>> +               entry->oids[entry->last] = oid;
>> +               entry->last++;
>> +       }
>> +}
>> +
>> +static void build_oid_tree(const struct sd_inode *inode)
>> +{
>> +       uint32_t max_idx, vid;
>> +       uint64_t oid;
>> +       struct sd_node *node;
>> +       struct oid_entry *entry;
>> +       int copies = min((int)inode->nr_copies, sd_zones_nr);
>> +
>> +       rb_for_each_entry(node, &sd_nroot, rb) {
>> +               entry = xmalloc(sizeof(*entry));
>> +               entry->node = node;
>> +               entry->oids = xmalloc(sizeof(uint64_t) *
>> OIDS_INIT_LENGTH);
>> +               entry->end  = OIDS_INIT_LENGTH;
>> +               entry->last = 0;
>> +               rb_insert(&oid_tree, entry, rb, oid_entry_cmp);
>> +       }
>> +
>> +       save_oid(vid_to_vdi_oid(inode->vdi_id), copies);
>> +       max_idx = count_data_objs(inode);
>> +       for (uint32_t idx = 0; idx < max_idx; idx++) {
>> +               vid = sd_inode_get_vid(inode, idx);
>> +               if (vid == 0)
>> +                       continue;
>> +               oid = vid_to_data_oid(vid, idx);
>> +               save_oid(oid, copies);
>> +       }
>> +}
>> +
>> +static void destroy_oid_tree(void)
>> +{
>> +       struct oid_entry *entry;
>> +
>> +       rb_for_each_entry(entry, &oid_tree, rb)
>> +               free(entry->oids);
>> +       rb_destroy(&oid_tree, struct oid_entry, rb);
>> +}
>> +
>> +static int do_obj_check(const struct sd_inode *inode)
>> +{
>> +       int total = 0;
>> +       struct oid_entry *entry;
>> +       struct sd_req hdr;
>> +       struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
>> +
>> +       build_oid_tree(inode);
>> +
>> +       rb_for_each_entry(entry, &oid_tree, rb) {
>> +               sd_init_req(&hdr, SD_OP_OIDS_EXIST);
>> +               hdr.data_length = sizeof(uint64_t) * entry->last;
>> +               hdr.flags = SD_FLAG_CMD_WRITE | SD_FLAG_CMD_READ;
>> +               int ret = dog_exec_req(&entry->node->nid, &hdr,
>> entry->oids);
>> +               if (ret < 0)
>> +                       panic("dog_exec_req() failure.");
>> +
>> +               int n = rsp->data_length / sizeof(uint64_t);
>> +               total += n;
>> +               for (int i = 0; i < n; i++)
>> +                       printf("[%s] oid %016"PRIx64" is missing.\n",
>> +                                       addr_to_str(entry->node->nid.addr,
>> +
>> entry->node->nid.port),
>> +                                       entry->oids[i]);
>> +       }
>> +
>> +       destroy_oid_tree();
>> +
>> +       if (total == 0) {
>> +               printf("%s is fine, no object is missing.\n",
>> inode->name);
>> +               return EXIT_SUCCESS;
>> +       } else {
>> +               printf("%s lost %d object(s).\n", inode->name, total);
>> +               return EXIT_FAILURE;
>> +       }
>> +}
>> +
>>   static int do_track_object(uint64_t oid, uint8_t nr_copies)
>>   {
>>         int i, j, ret;
>> @@ -1771,7 +1875,10 @@ static int vdi_check(int argc, char **argv)
>>                 goto out;
>>         }
>>   -     ret = do_vdi_check(inode);
>> +       if (vdi_cmd_data.exist)
>> +               ret = do_obj_check(inode);
>> +       else
>> +               ret = do_vdi_check(inode);
>>   out:
>>         free(inode);
>>         return ret;
>> @@ -2359,7 +2466,7 @@ static int vdi_cache(int argc, char **argv)
>>   }
>>     static struct subcommand vdi_cmd[] = {
>> -       {"check", "<vdiname>", "saph", "check and repair image's
>> consistency",
>> +       {"check", "<vdiname>", "seaph", "check and repair image's
>> consistency",
>>          NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
>>          vdi_check, vdi_options},
>>         {"create", "<vdiname> <size>", "Pycaphrv", "create an image",
>> @@ -2491,6 +2598,9 @@ static int vdi_parser(int ch, const char *opt)
>>                         exit(EXIT_FAILURE);
>>                 }
>>                 break;
>> +       case 'e':
>> +               vdi_cmd_data.exist = true;
>> +               break;
>>         }
>>         return 0;
>> diff --git a/include/internal_proto.h b/include/internal_proto.h
>> index ada084f..9d3928e 100644
>> --- a/include/internal_proto.h
>> +++ b/include/internal_proto.h
>> @@ -100,6 +100,7 @@
>>   #define SD_OP_NFS_DELETE      0xBC
>>   #define SD_OP_EXIST   0xBD
>>   #define SD_OP_CLUSTER_INFO    0xBE
>> +#define SD_OP_OIDS_EXIST       0xBF
>>   #define SD_OP_ALTER_CLUSTER_COPY      0xC0
>>   #define SD_OP_ALTER_VDI_COPY  0xC1
>>   @@ -175,6 +176,14 @@ struct sd_node {
>>   #endif
>>   };
>>   +struct oid_entry {
>> +       struct rb_node rb;
>> +       struct sd_node *node; /* key */
>> +       uint64_t *oids;       /* object id array */
>> +       int end;              /* idx to the end of the allocated oid array
>> */
>> +       int last;             /* idx to the last element of the oid array
>> */
>> +};
>> +
>>   /*
>>    * A joining sheep multicasts the local cluster info.  Then, the
>> existing nodes
>>    * reply the latest cluster info which is unique among all of the nodes.
>> diff --git a/include/sheep.h b/include/sheep.h
>> index 785883e..ef8958c 100644
>> --- a/include/sheep.h
>> +++ b/include/sheep.h
>> @@ -199,6 +199,12 @@ static inline int node_cmp(const struct sd_node
>> *node1,
>>         return node_id_cmp(&node1->nid, &node2->nid);
>>   }
>>   +static inline int oid_entry_cmp(const struct oid_entry *entry1,
>> +                          const struct oid_entry *entry2)
>> +{
>> +       return node_cmp(entry1->node, entry2->node);
>> +}
>> +
>>   static inline bool node_eq(const struct sd_node *a, const struct sd_node
>> *b)
>>   {
>>         return node_cmp(a, b) == 0;
>> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
>> index 9361bad..c6e21b4 100644
>> --- a/include/sheepdog_proto.h
>> +++ b/include/sheepdog_proto.h
>> @@ -45,6 +45,7 @@
>>   #define SD_FLAG_CMD_COW      0x02
>>   #define SD_FLAG_CMD_CACHE    0x04
>>   #define SD_FLAG_CMD_DIRECT   0x08 /* don't use object cache */
>> +#define SD_FLAG_CMD_READ     0x10
>>   /* flags above 0x80 are sheepdog-internal */
>>     #define SD_RES_SUCCESS       0x00 /* Success */
>> diff --git a/lib/net.c b/lib/net.c
>> index b32e022..c2d86cb 100644
>> --- a/lib/net.c
>> +++ b/lib/net.c
>> @@ -333,7 +333,7 @@ int exec_req(int sockfd, struct sd_req *hdr, void
>> *data,
>>         if (hdr->flags & SD_FLAG_CMD_WRITE) {
>>                 wlen = hdr->data_length;
>> -               rlen = 0;
>> +               rlen = (hdr->flags & SD_FLAG_CMD_READ) ? hdr->data_length
>> : 0;
>>         } else {
>>                 wlen = 0;
>>                 rlen = hdr->data_length;
>> diff --git a/sheep/ops.c b/sheep/ops.c
>> index 61eb37f..c54017e 100644
>> --- a/sheep/ops.c
>> +++ b/sheep/ops.c
>> @@ -1058,6 +1058,30 @@ static int local_oid_exist(struct request *req)
>>         return SD_RES_NO_OBJ;
>>   }
>>   +static int local_oids_exist(const struct sd_req *req, struct sd_rsp
>> *rsp,
>> +                             void *data)
>> +{
>> +       struct request *r = container_of(req, struct request, rq);
>> +       uint64_t *oids = (uint64_t *) data;
>> +       uint8_t ec_index;
>> +       int i, j, n = req->data_length / sizeof(uint64_t);
>> +
>> +       for (i = 0, j = 0; i < n; i++) {
>> +               ec_index = local_ec_index(r->vinfo, oids[i]);
>> +               if (is_erasure_oid(oids[i]) && ec_index == SD_MAX_COPIES)
>> +                       oids[j++] = oids[i];
>> +               else if (!sd_store->exist(oids[i], ec_index))
>> +                       oids[j++] = oids[i];
>> +       }
>> +
>> +       if (j > 0) {
>> +               rsp->data_length = sizeof(uint64_t) * j;
>> +               return SD_RES_NO_OBJ;
>> +       }
>> +
>> +       return SD_RES_SUCCESS;
>> +}
>> +
>>   static int local_cluster_info(const struct sd_req *req, struct sd_rsp
>> *rsp,
>>                               void *data)
>>   {
>> @@ -1408,6 +1432,13 @@ static struct sd_op_template sd_ops[] = {
>>                 .process_work = local_oid_exist,
>>         },
>>   +     [SD_OP_OIDS_EXIST] =  {
>> +               .name = "OIDS_EXIST",
>> +               .type = SD_OP_TYPE_LOCAL,
>> +               .force = true,
>> +               .process_main = local_oids_exist,
>> +       },
>> +
>>         [SD_OP_CLUSTER_INFO] = {
>>                 .name = "CLUSTER INFO",
>>                 .type = SD_OP_TYPE_LOCAL,
>
>
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog



More information about the sheepdog mailing list