[sheepdog] [PATCH v4] add vdi check -e to quickly check which objects are lost

Hitoshi Mitake mitake.hitoshi at gmail.com
Tue Jul 29 02:45:31 CEST 2014


At Mon, 28 Jul 2014 18:11:42 +0800,
Ruoyu wrote:
> 
> Sometimes we want to quickly check whether some of the vdi objects
> or data objects are lost due to unexpected issue.
> 
> Although vdi check will do, it spends a lot of time because of
> too many client-server communication. And the probability of
> triggering data auto fixing is quite low since the writing process
> is strong consistency.
> 
> Therefore, the new option -e (--exist) check whether all the objects
> related to the vdi are existed or not. It is fast because it submit
> the batched object id only one time per node. I think this is enough
> for the situation.
> 
> Usage: dog vdi check -e <vdiname>
> 
> Example:
> $ dog vdi check -e test
> test is fine, no object is missing.
> 
> $ dog vdi check -e ucweb
> [127.0.0.1:7001] oid 80b8071d00000000 is missing.
> [127.0.0.1:7001] oid 00b8071d000000ee is missing.
> ucweb lost 2 object(s).
> 
> v4 is rebased on the latest master and
> 
> 1. helping message is updated to tell user vdi check -e will
>    not comparing nor repairing objects
> 2. the function do_obj_check is renamed as do_vdi_check_exist
> 3. a new command flag, SD_FLAG_CMD_FILTER, is introduced because
>    both read and write is not so appropriate
> 4. the value of SD_FLAG_CMD_FILTER is changed because the original one
>    is occupied
> 
> Signed-off-by: Ruoyu <liangry at ucweb.com>
> ---
>  dog/vdi.c                | 115 ++++++++++++++++++++++++++++++++++++++++++++++-
>  include/internal_proto.h |   9 ++++
>  include/sheep.h          |   6 +++
>  include/sheepdog_proto.h |   1 +
>  lib/net.c                |   3 ++
>  sheep/ops.c              |  31 +++++++++++++
>  6 files changed, 163 insertions(+), 2 deletions(-)

Applied, thanks.
Hitoshi

> 
> diff --git a/dog/vdi.c b/dog/vdi.c
> index 97ae63c..93ae763 100644
> --- a/dog/vdi.c
> +++ b/dog/vdi.c
> @@ -21,6 +21,8 @@
>  #include "sha1.h"
>  #include "fec.h"
>  
> +struct rb_root oid_tree = RB_ROOT;
> +
>  static struct sd_option vdi_options[] = {
>  	{'P', "prealloc", false, "preallocate all the data objects"},
>  	{'n', "no-share", false, "share nothing with its parent"},
> @@ -34,6 +36,8 @@ static struct sd_option vdi_options[] = {
>  	{'f', "force", false, "do operation forcibly"},
>  	{'y', "hyper", false, "create a hyper volume"},
>  	{'o', "oid", true, "specify the object id of the tracking object"},
> +	{'e', "exist", false, "only check objects exist or not,\n"
> +	 "                          neither comparing nor repairing"},
>  	{ 0, NULL, false, NULL },
>  };
>  
> @@ -53,6 +57,7 @@ static struct vdi_cmd_data {
>  	uint8_t store_policy;
>  	uint64_t oid;
>  	bool no_share;
> +	bool exist;
>  } vdi_cmd_data = { ~0, };
>  
>  struct get_vdi_info {
> @@ -985,6 +990,106 @@ out:
>  	return ret;
>  }
>  
> +#define OIDS_INIT_LENGTH 1024
> +
> +static void save_oid(uint64_t oid, int copies)
> +{
> +	const struct sd_vnode *vnodes[SD_MAX_COPIES];
> +	struct oid_entry *entry;
> +
> +	oid_to_vnodes(oid, &sd_vroot, copies, vnodes);
> +	for (int i = 0; i < copies; i++) {
> +		struct oid_entry key = {
> +			.node = (struct sd_node *) vnodes[i]->node
> +		};
> +		entry = rb_search(&oid_tree, &key, rb, oid_entry_cmp);
> +		if (!entry)
> +			panic("rb_search() failure.");
> +
> +		if (entry->last >= entry->end) {
> +			entry->end *= 2;
> +			entry->oids = xrealloc(entry->oids,
> +					sizeof(uint64_t) * entry->end);
> +		}
> +		entry->oids[entry->last] = oid;
> +		entry->last++;
> +	}
> +}
> +
> +static void build_oid_tree(const struct sd_inode *inode)
> +{
> +	uint32_t max_idx, vid;
> +	uint64_t oid;
> +	struct sd_node *node;
> +	struct oid_entry *entry;
> +	int copies = min((int)inode->nr_copies, sd_zones_nr);
> +
> +	rb_for_each_entry(node, &sd_nroot, rb) {
> +		entry = xmalloc(sizeof(*entry));
> +		entry->node = node;
> +		entry->oids = xmalloc(sizeof(uint64_t) * OIDS_INIT_LENGTH);
> +		entry->end  = OIDS_INIT_LENGTH;
> +		entry->last = 0;
> +		rb_insert(&oid_tree, entry, rb, oid_entry_cmp);
> +	}
> +
> +	save_oid(vid_to_vdi_oid(inode->vdi_id), copies);
> +	max_idx = count_data_objs(inode);
> +	for (uint32_t idx = 0; idx < max_idx; idx++) {
> +		vid = sd_inode_get_vid(inode, idx);
> +		if (vid == 0)
> +			continue;
> +		oid = vid_to_data_oid(vid, idx);
> +		save_oid(oid, copies);
> +	}
> +}
> +
> +static void destroy_oid_tree(void)
> +{
> +	struct oid_entry *entry;
> +
> +	rb_for_each_entry(entry, &oid_tree, rb)
> +		free(entry->oids);
> +	rb_destroy(&oid_tree, struct oid_entry, rb);
> +}
> +
> +static int do_vdi_check_exist(const struct sd_inode *inode)
> +{
> +	int total = 0;
> +	struct oid_entry *entry;
> +	struct sd_req hdr;
> +	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> +
> +	build_oid_tree(inode);
> +
> +	rb_for_each_entry(entry, &oid_tree, rb) {
> +		sd_init_req(&hdr, SD_OP_OIDS_EXIST);
> +		hdr.data_length = sizeof(uint64_t) * entry->last;
> +		hdr.flags = SD_FLAG_CMD_FILTER;
> +		int ret = dog_exec_req(&entry->node->nid, &hdr, entry->oids);
> +		if (ret < 0)
> +			panic("dog_exec_req() failure.");
> +
> +		int n = rsp->data_length / sizeof(uint64_t);
> +		total += n;
> +		for (int i = 0; i < n; i++)
> +			printf("[%s] oid %016"PRIx64" is missing.\n",
> +					addr_to_str(entry->node->nid.addr,
> +							entry->node->nid.port),
> +					entry->oids[i]);
> +	}
> +
> +	destroy_oid_tree();
> +
> +	if (total == 0) {
> +		printf("%s is fine, no object is missing.\n", inode->name);
> +		return EXIT_SUCCESS;
> +	} else {
> +		printf("%s lost %d object(s).\n", inode->name, total);
> +		return EXIT_FAILURE;
> +	}
> +}
> +
>  static int do_track_object(uint64_t oid, uint8_t nr_copies)
>  {
>  	int i, j, ret;
> @@ -1873,7 +1978,10 @@ static int vdi_check(int argc, char **argv)
>  		goto out;
>  	}
>  
> -	ret = do_vdi_check(inode);
> +	if (vdi_cmd_data.exist)
> +		ret = do_vdi_check_exist(inode);
> +	else
> +		ret = do_vdi_check(inode);
>  out:
>  	free(inode);
>  	return ret;
> @@ -2591,7 +2699,7 @@ static int vdi_alter_copy(int argc, char **argv)
>  }
>  
>  static struct subcommand vdi_cmd[] = {
> -	{"check", "<vdiname>", "sapht", "check and repair image's consistency",
> +	{"check", "<vdiname>", "seapht", "check and repair image's consistency",
>  	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
>  	 vdi_check, vdi_options},
>  	{"create", "<vdiname> <size>", "Pycaphrvt", "create an image",
> @@ -2735,6 +2843,9 @@ static int vdi_parser(int ch, const char *opt)
>  			exit(EXIT_FAILURE);
>  		}
>  		break;
> +	case 'e':
> +		vdi_cmd_data.exist = true;
> +		break;
>  	}
>  
>  	return 0;
> diff --git a/include/internal_proto.h b/include/internal_proto.h
> index 2affc42..37afb46 100644
> --- a/include/internal_proto.h
> +++ b/include/internal_proto.h
> @@ -107,6 +107,7 @@
>  #define SD_OP_PREVENT_INODE_UPDATE    0xC3
>  #define SD_OP_ALLOW_INODE_UPDATE      0xC4
>  #define SD_OP_REPAIR_REPLICA	0xC5
> +#define SD_OP_OIDS_EXIST	0xC6
>  
>  /* internal flags for hdr.flags, must be above 0x80 */
>  #define SD_FLAG_CMD_RECOVERY 0x0080
> @@ -180,6 +181,14 @@ struct sd_node {
>  #endif
>  };
>  
> +struct oid_entry {
> +	struct rb_node rb;
> +	struct sd_node *node; /* key */
> +	uint64_t *oids;       /* object id array */
> +	int end;              /* idx to the end of the allocated oid array */
> +	int last;             /* idx to the last element of the oid array */
> +};
> +
>  /*
>   * A joining sheep multicasts the local cluster info.  Then, the existing nodes
>   * reply the latest cluster info which is unique among all of the nodes.
> diff --git a/include/sheep.h b/include/sheep.h
> index e062372..5b136a8 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -199,6 +199,12 @@ static inline int node_cmp(const struct sd_node *node1,
>  	return node_id_cmp(&node1->nid, &node2->nid);
>  }
>  
> +static inline int oid_entry_cmp(const struct oid_entry *entry1,
> +			   const struct oid_entry *entry2)
> +{
> +	return node_cmp(entry1->node, entry2->node);
> +}
> +
>  static inline bool node_eq(const struct sd_node *a, const struct sd_node *b)
>  {
>  	return node_cmp(a, b) == 0;
> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> index d6a8d35..b4e1e13 100644
> --- a/include/sheepdog_proto.h
> +++ b/include/sheepdog_proto.h
> @@ -50,6 +50,7 @@
>  #define SD_FLAG_CMD_COW      0x02
>  #define SD_FLAG_CMD_CACHE    0x04
>  #define SD_FLAG_CMD_DIRECT   0x08 /* don't use object cache */
> +#define SD_FLAG_CMD_FILTER   0x11 /* write & read, output is subset of input */
>  /* flags above 0x80 are sheepdog-internal */
>  
>  #define SD_RES_SUCCESS       0x00 /* Success */
> diff --git a/lib/net.c b/lib/net.c
> index b32e022..552e945 100644
> --- a/lib/net.c
> +++ b/lib/net.c
> @@ -334,6 +334,9 @@ int exec_req(int sockfd, struct sd_req *hdr, void *data,
>  	if (hdr->flags & SD_FLAG_CMD_WRITE) {
>  		wlen = hdr->data_length;
>  		rlen = 0;
> +	} else if (hdr->flags & SD_FLAG_CMD_FILTER) {
> +		wlen = hdr->data_length;
> +		rlen = hdr->data_length;
>  	} else {
>  		wlen = 0;
>  		rlen = hdr->data_length;
> diff --git a/sheep/ops.c b/sheep/ops.c
> index dc10f0f..3d20c7d 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -1056,6 +1056,30 @@ static int local_oid_exist(struct request *req)
>  	return SD_RES_NO_OBJ;
>  }
>  
> +static int local_oids_exist(const struct sd_req *req, struct sd_rsp *rsp,
> +			      void *data)
> +{
> +	struct request *r = container_of(req, struct request, rq);
> +	uint64_t *oids = (uint64_t *) data;
> +	uint8_t ec_index;
> +	int i, j, n = req->data_length / sizeof(uint64_t);
> +
> +	for (i = 0, j = 0; i < n; i++) {
> +		ec_index = local_ec_index(r->vinfo, oids[i]);
> +		if (is_erasure_oid(oids[i]) && ec_index == SD_MAX_COPIES)
> +			oids[j++] = oids[i];
> +		else if (!sd_store->exist(oids[i], ec_index))
> +			oids[j++] = oids[i];
> +	}
> +
> +	if (j > 0) {
> +		rsp->data_length = sizeof(uint64_t) * j;
> +		return SD_RES_NO_OBJ;
> +	}
> +
> +	return SD_RES_SUCCESS;
> +}
> +
>  static int local_cluster_info(const struct sd_req *req, struct sd_rsp *rsp,
>  			      void *data)
>  {
> @@ -1594,6 +1618,13 @@ static struct sd_op_template sd_ops[] = {
>  		.process_work = local_oid_exist,
>  	},
>  
> +	[SD_OP_OIDS_EXIST] =  {
> +		.name = "OIDS_EXIST",
> +		.type = SD_OP_TYPE_LOCAL,
> +		.force = true,
> +		.process_main = local_oids_exist,
> +	},
> +
>  	[SD_OP_CLUSTER_INFO] = {
>  		.name = "CLUSTER INFO",
>  		.type = SD_OP_TYPE_LOCAL,
> -- 
> 1.8.3.2
> 
> 
> -- 
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog



More information about the sheepdog mailing list