[sheepdog] [PATCH] add dog vdi object exist command to check which objects are lost.

Liu Yuan namei.unix at gmail.com
Thu May 22 15:30:01 CEST 2014


On Thu, May 22, 2014 at 07:56:06PM +0800, Ruoyu wrote:
> Sometimes we want to quickly check whether some of the vdi objects
> or data objects are lost due to unexpected issue.
> 
> Although dog vdi check will do, it spends a lot of time because of
> too many client-server communication. And the probability of
> triggering data auto fixing is quite low since the writing process
> is strong consistency.
> 
> Therefore, the new command check whether all the objects related to
> the vdi are existed or not. It is fast because it submit the
> batched object id only one time per node. I think this is enough
> for the situation.
> 
> Usage: dog vdi object exist <vdiname>

This would be confusing with 'dog vdi object location'. I think

'dog vdi check --light' is a better interface.

Besides, I think you can check SD_OP_EXIST operation for reference, you don't
need 

> 
> Example:
> $ dog vdi object exist test
> test is fine, no object is missing.
> 
> $ dog vdi object exist ucweb
> [127.0.0.1:7001] oid 80b8071d00000000 is missing.
> [127.0.0.1:7001] oid 00b8071d000000ee is missing.
> ucweb lost 2 object(s).
> 
> Signed-off-by: Ruoyu <liangry at ucweb.com>
> ---
>  dog/vdi.c                | 125 +++++++++++++++++++++++++++++++++++++++++++++++
>  include/internal_proto.h |   9 ++++
>  include/sheep.h          |   6 +++
>  include/sheepdog_proto.h |   1 +
>  lib/net.c                |   2 +-
>  sheep/ops.c              |  31 ++++++++++++
>  6 files changed, 173 insertions(+), 1 deletion(-)
> 
> diff --git a/dog/vdi.c b/dog/vdi.c
> index 9c34bfb..defb5bd 100644
> --- a/dog/vdi.c
> +++ b/dog/vdi.c
> @@ -21,6 +21,8 @@
>  #include "sha1.h"
>  #include "fec.h"
>  
> +struct rb_root node_oids_root = RB_ROOT;
> +
>  static struct sd_option vdi_options[] = {
>  	{'P', "prealloc", false, "preallocate all the data objects"},
>  	{'i', "index", true, "specify the index of data objects"},
> @@ -866,6 +868,127 @@ out:
>  	return ret;
>  }
>  
> +#define OIDS_INIT_LENGTH 1024
> +
> +static void store_oid(uint64_t oid, int copies)
> +{
> +	const struct sd_vnode *vnodes[SD_MAX_COPIES];
> +	struct node_oids_entry *entry;
> +
> +	oid_to_vnodes(oid, &sd_vroot, copies, vnodes);
> +	for (int i = 0; i < copies; i++) {
> +		struct node_oids_entry key = {
> +			.node = (struct sd_node *) vnodes[i]->node
> +		};
> +		entry = rb_search(&node_oids_root, &key,
> +				rb, node_oids_entry_cmp);
> +		if (!entry)
> +			panic("rb_search() failure.");
> +
> +		if (entry->pos >= entry->last) {
> +			entry->last *= 2;
> +			entry->oids = xrealloc(entry->oids,
> +					sizeof(uint64_t) * entry->last);
> +		}
> +		entry->oids[entry->pos] = oid;
> +		entry->pos++;
> +	}
> +}
> +
> +static void init_node_oids_tree(const struct sd_inode *inode)
> +{
> +	uint32_t max_idx, vid;
> +	uint64_t oid;
> +	struct sd_node *node;
> +	struct node_oids_entry *entry;
> +	int nr_copies = min((int)inode->nr_copies, sd_zones_nr);
> +
> +	rb_for_each_entry(node, &sd_nroot, rb) {
> +		entry = xmalloc(sizeof(*entry));
> +		entry->node = node;
> +		entry->oids = xmalloc(sizeof(uint64_t) * OIDS_INIT_LENGTH);
> +		entry->last = OIDS_INIT_LENGTH;
> +		entry->pos = 0;
> +		rb_insert(&node_oids_root, entry, rb, node_oids_entry_cmp);
> +	}
> +
> +	store_oid(vid_to_vdi_oid(inode->vdi_id), nr_copies);
> +	max_idx = count_data_objs(inode);
> +	for (uint32_t idx = 0; idx < max_idx; idx++) {
> +		vid = sd_inode_get_vid(inode, idx);
> +		if (vid == 0)
> +			continue;
> +		oid = vid_to_data_oid(vid, idx);
> +		store_oid(oid, nr_copies);
> +	}
> +}
> +
> +static void destroy_node_oids_tree(void)
> +{
> +	struct node_oids_entry *entry;
> +
> +	rb_for_each_entry(entry, &node_oids_root, rb)
> +		free(entry->oids);
> +	rb_destroy(&node_oids_root, struct sd_node, rb);
> +}
> +
> +static int do_vdi_object_exist(const struct sd_inode *inode)
> +{
> +	int total = 0;
> +	struct node_oids_entry *entry;
> +	struct sd_req hdr;
> +	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
> +
> +	init_node_oids_tree(inode);
> +
> +	rb_for_each_entry(entry, &node_oids_root, rb) {
> +		sd_init_req(&hdr, SD_OP_OIDS_EXIST);
> +		hdr.data_length = sizeof(uint64_t) * entry->pos;
> +		hdr.flags = SD_FLAG_CMD_WRITE | SD_FLAG_CMD_READ;
> +		int ret = dog_exec_req(&entry->node->nid, &hdr, entry->oids);
> +		if (ret < 0)
> +			panic("dog_exec_req() failure.");
> +
> +		int n = rsp->data_length / sizeof(uint64_t);
> +		total += n;
> +		for (int i = 0; i < n; i++)
> +			printf("[%s] oid %016"PRIx64" is missing.\n",
> +					addr_to_str(entry->node->nid.addr,
> +							entry->node->nid.port),
> +					entry->oids[i]);
> +	}
> +
> +	destroy_node_oids_tree();
> +	return total;
> +}
> +
> +static int vdi_object_exist(int argc, char **argv)
> +{
> +	const char *vdiname = argv[optind++];
> +	int total, ret;
> +	struct sd_inode *inode = xmalloc(sizeof(*inode));
> +
> +	ret = read_vdi_obj(vdiname, vdi_cmd_data.snapshot_id,
> +			   vdi_cmd_data.snapshot_tag, NULL, inode,
> +			   SD_INODE_SIZE);
> +	if (ret != EXIT_SUCCESS) {
> +		sd_err("FATAL: no inode objects");
> +		goto out;
> +	}
> +
> +	total = do_vdi_object_exist(inode);
> +	if (total == 0) {
> +		printf("%s is fine, no object is missing.\n", vdiname);
> +		ret = EXIT_SUCCESS;
> +	} else {
> +		printf("%s lost %d object(s).\n", vdiname, total);
> +		ret = EXIT_FAILURE;
> +	}
> +out:
> +	free(inode);
> +	return ret;
> +}
> +
>  static int do_track_object(uint64_t oid, uint8_t nr_copies)
>  {
>  	int i, j, ret;
> @@ -2323,6 +2446,8 @@ static struct subcommand vdi_object_cmd[] = {
>  	 NULL, CMD_NEED_ARG, vdi_object_map},
>  	{"dump-inode", NULL, NULL, "dump inode information",
>  	 NULL, CMD_NEED_ARG, vdi_object_dump_inode},
> +	{"exist", NULL, NULL, "show which objects are missing",
> +	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_object_exist},
>  	{NULL},
>  };
>  
> diff --git a/include/internal_proto.h b/include/internal_proto.h
> index 73ed581..53b5b00 100644
> --- a/include/internal_proto.h
> +++ b/include/internal_proto.h
> @@ -100,6 +100,7 @@
>  #define SD_OP_NFS_DELETE	0xBC
>  #define SD_OP_EXIST	0xBD
>  #define SD_OP_CLUSTER_INFO	0xBE
> +#define SD_OP_OIDS_EXIST	0xBF
>  
>  /* internal flags for hdr.flags, must be above 0x80 */
>  #define SD_FLAG_CMD_RECOVERY 0x0080
> @@ -173,6 +174,14 @@ struct sd_node {
>  #endif
>  };
>  
> +struct node_oids_entry {
> +	struct rb_node rb;
> +	struct sd_node *node;
> +	uint64_t *oids;
> +	int last;
> +	int pos;
> +};
> +
>  /*
>   * A joining sheep multicasts the local cluster info.  Then, the existing nodes
>   * reply the latest cluster info which is unique among all of the nodes.
> diff --git a/include/sheep.h b/include/sheep.h
> index 785883e..9b56dd8 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -199,6 +199,12 @@ static inline int node_cmp(const struct sd_node *node1,
>  	return node_id_cmp(&node1->nid, &node2->nid);
>  }
>  
> +static inline int node_oids_entry_cmp(const struct node_oids_entry *entry1,
> +			   const struct node_oids_entry *entry2)
> +{
> +	return node_cmp(entry1->node, entry2->node);
> +}
> +
>  static inline bool node_eq(const struct sd_node *a, const struct sd_node *b)
>  {
>  	return node_cmp(a, b) == 0;
> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> index 9361bad..c6e21b4 100644
> --- a/include/sheepdog_proto.h
> +++ b/include/sheepdog_proto.h
> @@ -45,6 +45,7 @@
>  #define SD_FLAG_CMD_COW      0x02
>  #define SD_FLAG_CMD_CACHE    0x04
>  #define SD_FLAG_CMD_DIRECT   0x08 /* don't use object cache */
> +#define SD_FLAG_CMD_READ     0x10

why you need this SD_FLAG_CMD_READ? 

Thanks
Yuan



More information about the sheepdog mailing list