[sheepdog] [PATCH v3] add vdi check -e to quickly check which objects are lost.
Ruoyu
liangry at ucweb.com
Tue Jul 1 03:51:34 CEST 2014
How about this patch regardless of the opcode?
The request SD_OP_OIDS_EXIST requires a new operation both reading and
writing. Is there a better way to satisfy it?
On 2014年05月28日 17:48, Ruoyu wrote:
> Sometimes we want to quickly check whether some of the vdi objects
> or data objects are lost due to unexpected issue.
>
> Although vdi check will do, it spends a lot of time because of
> too many client-server communication. And the probability of
> triggering data auto fixing is quite low since the writing process
> is strong consistency.
>
> Therefore, the new option -e (--exist) check whether all the objects
> related to the vdi are existed or not. It is fast because it submit
> the batched object id only one time per node. I think this is enough
> for the situation.
>
> Usage: dog vdi check -e <vdiname>
>
> Example:
> $ dog vdi check -e test
> test is fine, no object is missing.
>
> $ dog vdi check -e ucweb
> [127.0.0.1:7001] oid 80b8071d00000000 is missing.
> [127.0.0.1:7001] oid 00b8071d000000ee is missing.
> ucweb lost 2 object(s).
>
> Signed-off-by: Ruoyu <liangry at ucweb.com>
> ---
> dog/vdi.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++-
> include/internal_proto.h | 9 ++++
> include/sheep.h | 6 +++
> include/sheepdog_proto.h | 1 +
> lib/net.c | 2 +-
> sheep/ops.c | 31 +++++++++++++
> 6 files changed, 160 insertions(+), 3 deletions(-)
>
> diff --git a/dog/vdi.c b/dog/vdi.c
> index 866cb36..45fe6a9 100644
> --- a/dog/vdi.c
> +++ b/dog/vdi.c
> @@ -21,6 +21,8 @@
> #include "sha1.h"
> #include "fec.h"
>
> +struct rb_root oid_tree = RB_ROOT;
> +
> static struct sd_option vdi_options[] = {
> {'P', "prealloc", false, "preallocate all the data objects"},
> {'n', "no-share", false, "share nothing with its parent"},
> @@ -34,6 +36,7 @@ static struct sd_option vdi_options[] = {
> {'f', "force", false, "do operation forcibly"},
> {'y', "hyper", false, "create a hyper volume"},
> {'o', "oid", true, "specify the object id of the tracking object"},
> + {'e', "exist", false, "check objects exist or not only, no repairing"},
> { 0, NULL, false, NULL },
> };
>
> @@ -53,6 +56,7 @@ static struct vdi_cmd_data {
> uint8_t store_policy;
> uint64_t oid;
> bool no_share;
> + bool exist;
> } vdi_cmd_data = { ~0, };
>
> struct get_vdi_info {
> @@ -875,6 +879,106 @@ out:
> return ret;
> }
>
> +#define OIDS_INIT_LENGTH 1024
> +
> +static void save_oid(uint64_t oid, int copies)
> +{
> + const struct sd_vnode *vnodes[SD_MAX_COPIES];
> + struct oid_entry *entry;
> +
> + oid_to_vnodes(oid, &sd_vroot, copies, vnodes);
> + for (int i = 0; i < copies; i++) {
> + struct oid_entry key = {
> + .node = (struct sd_node *) vnodes[i]->node
> + };
> + entry = rb_search(&oid_tree, &key, rb, oid_entry_cmp);
> + if (!entry)
> + panic("rb_search() failure.");
> +
> + if (entry->last >= entry->end) {
> + entry->end *= 2;
> + entry->oids = xrealloc(entry->oids,
> + sizeof(uint64_t) * entry->end);
> + }
> + entry->oids[entry->last] = oid;
> + entry->last++;
> + }
> +}
> +
> +static void build_oid_tree(const struct sd_inode *inode)
> +{
> + uint32_t max_idx, vid;
> + uint64_t oid;
> + struct sd_node *node;
> + struct oid_entry *entry;
> + int copies = min((int)inode->nr_copies, sd_zones_nr);
> +
> + rb_for_each_entry(node, &sd_nroot, rb) {
> + entry = xmalloc(sizeof(*entry));
> + entry->node = node;
> + entry->oids = xmalloc(sizeof(uint64_t) * OIDS_INIT_LENGTH);
> + entry->end = OIDS_INIT_LENGTH;
> + entry->last = 0;
> + rb_insert(&oid_tree, entry, rb, oid_entry_cmp);
> + }
> +
> + save_oid(vid_to_vdi_oid(inode->vdi_id), copies);
> + max_idx = count_data_objs(inode);
> + for (uint32_t idx = 0; idx < max_idx; idx++) {
> + vid = sd_inode_get_vid(inode, idx);
> + if (vid == 0)
> + continue;
> + oid = vid_to_data_oid(vid, idx);
> + save_oid(oid, copies);
> + }
> +}
> +
> +static void destroy_oid_tree(void)
> +{
> + struct oid_entry *entry;
> +
> + rb_for_each_entry(entry, &oid_tree, rb)
> + free(entry->oids);
> + rb_destroy(&oid_tree, struct oid_entry, rb);
> +}
> +
> +static int do_obj_check(const struct sd_inode *inode)
> +{
> + int total = 0;
> + struct oid_entry *entry;
> + struct sd_req hdr;
> + struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> +
> + build_oid_tree(inode);
> +
> + rb_for_each_entry(entry, &oid_tree, rb) {
> + sd_init_req(&hdr, SD_OP_OIDS_EXIST);
> + hdr.data_length = sizeof(uint64_t) * entry->last;
> + hdr.flags = SD_FLAG_CMD_WRITE | SD_FLAG_CMD_READ;
> + int ret = dog_exec_req(&entry->node->nid, &hdr, entry->oids);
> + if (ret < 0)
> + panic("dog_exec_req() failure.");
> +
> + int n = rsp->data_length / sizeof(uint64_t);
> + total += n;
> + for (int i = 0; i < n; i++)
> + printf("[%s] oid %016"PRIx64" is missing.\n",
> + addr_to_str(entry->node->nid.addr,
> + entry->node->nid.port),
> + entry->oids[i]);
> + }
> +
> + destroy_oid_tree();
> +
> + if (total == 0) {
> + printf("%s is fine, no object is missing.\n", inode->name);
> + return EXIT_SUCCESS;
> + } else {
> + printf("%s lost %d object(s).\n", inode->name, total);
> + return EXIT_FAILURE;
> + }
> +}
> +
> static int do_track_object(uint64_t oid, uint8_t nr_copies)
> {
> int i, j, ret;
> @@ -1771,7 +1875,10 @@ static int vdi_check(int argc, char **argv)
> goto out;
> }
>
> - ret = do_vdi_check(inode);
> + if (vdi_cmd_data.exist)
> + ret = do_obj_check(inode);
> + else
> + ret = do_vdi_check(inode);
> out:
> free(inode);
> return ret;
> @@ -2359,7 +2466,7 @@ static int vdi_cache(int argc, char **argv)
> }
>
> static struct subcommand vdi_cmd[] = {
> - {"check", "<vdiname>", "saph", "check and repair image's consistency",
> + {"check", "<vdiname>", "seaph", "check and repair image's consistency",
> NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
> vdi_check, vdi_options},
> {"create", "<vdiname> <size>", "Pycaphrv", "create an image",
> @@ -2491,6 +2598,9 @@ static int vdi_parser(int ch, const char *opt)
> exit(EXIT_FAILURE);
> }
> break;
> + case 'e':
> + vdi_cmd_data.exist = true;
> + break;
> }
>
> return 0;
> diff --git a/include/internal_proto.h b/include/internal_proto.h
> index ada084f..9d3928e 100644
> --- a/include/internal_proto.h
> +++ b/include/internal_proto.h
> @@ -100,6 +100,7 @@
> #define SD_OP_NFS_DELETE 0xBC
> #define SD_OP_EXIST 0xBD
> #define SD_OP_CLUSTER_INFO 0xBE
> +#define SD_OP_OIDS_EXIST 0xBF
> #define SD_OP_ALTER_CLUSTER_COPY 0xC0
> #define SD_OP_ALTER_VDI_COPY 0xC1
>
> @@ -175,6 +176,14 @@ struct sd_node {
> #endif
> };
>
> +struct oid_entry {
> + struct rb_node rb;
> + struct sd_node *node; /* key */
> + uint64_t *oids; /* object id array */
> + int end; /* idx to the end of the allocated oid array */
> + int last; /* idx to the last element of the oid array */
> +};
> +
> /*
> * A joining sheep multicasts the local cluster info. Then, the existing nodes
> * reply the latest cluster info which is unique among all of the nodes.
> diff --git a/include/sheep.h b/include/sheep.h
> index 785883e..ef8958c 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -199,6 +199,12 @@ static inline int node_cmp(const struct sd_node *node1,
> return node_id_cmp(&node1->nid, &node2->nid);
> }
>
> +static inline int oid_entry_cmp(const struct oid_entry *entry1,
> + const struct oid_entry *entry2)
> +{
> + return node_cmp(entry1->node, entry2->node);
> +}
> +
> static inline bool node_eq(const struct sd_node *a, const struct sd_node *b)
> {
> return node_cmp(a, b) == 0;
> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> index 9361bad..c6e21b4 100644
> --- a/include/sheepdog_proto.h
> +++ b/include/sheepdog_proto.h
> @@ -45,6 +45,7 @@
> #define SD_FLAG_CMD_COW 0x02
> #define SD_FLAG_CMD_CACHE 0x04
> #define SD_FLAG_CMD_DIRECT 0x08 /* don't use object cache */
> +#define SD_FLAG_CMD_READ 0x10
> /* flags above 0x80 are sheepdog-internal */
>
> #define SD_RES_SUCCESS 0x00 /* Success */
> diff --git a/lib/net.c b/lib/net.c
> index b32e022..c2d86cb 100644
> --- a/lib/net.c
> +++ b/lib/net.c
> @@ -333,7 +333,7 @@ int exec_req(int sockfd, struct sd_req *hdr, void *data,
>
> if (hdr->flags & SD_FLAG_CMD_WRITE) {
> wlen = hdr->data_length;
> - rlen = 0;
> + rlen = (hdr->flags & SD_FLAG_CMD_READ) ? hdr->data_length : 0;
> } else {
> wlen = 0;
> rlen = hdr->data_length;
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 61eb37f..c54017e 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -1058,6 +1058,30 @@ static int local_oid_exist(struct request *req)
> return SD_RES_NO_OBJ;
> }
>
> +static int local_oids_exist(const struct sd_req *req, struct sd_rsp *rsp,
> + void *data)
> +{
> + struct request *r = container_of(req, struct request, rq);
> + uint64_t *oids = (uint64_t *) data;
> + uint8_t ec_index;
> + int i, j, n = req->data_length / sizeof(uint64_t);
> +
> + for (i = 0, j = 0; i < n; i++) {
> + ec_index = local_ec_index(r->vinfo, oids[i]);
> + if (is_erasure_oid(oids[i]) && ec_index == SD_MAX_COPIES)
> + oids[j++] = oids[i];
> + else if (!sd_store->exist(oids[i], ec_index))
> + oids[j++] = oids[i];
> + }
> +
> + if (j > 0) {
> + rsp->data_length = sizeof(uint64_t) * j;
> + return SD_RES_NO_OBJ;
> + }
> +
> + return SD_RES_SUCCESS;
> +}
> +
> static int local_cluster_info(const struct sd_req *req, struct sd_rsp *rsp,
> void *data)
> {
> @@ -1408,6 +1432,13 @@ static struct sd_op_template sd_ops[] = {
> .process_work = local_oid_exist,
> },
>
> + [SD_OP_OIDS_EXIST] = {
> + .name = "OIDS_EXIST",
> + .type = SD_OP_TYPE_LOCAL,
> + .force = true,
> + .process_main = local_oids_exist,
> + },
> +
> [SD_OP_CLUSTER_INFO] = {
> .name = "CLUSTER INFO",
> .type = SD_OP_TYPE_LOCAL,
More information about the sheepdog
mailing list