[sheepdog] [PATCH] add dog vdi object exist command to check which objects are lost.

Ruoyu liangry at ucweb.com
Fri May 23 03:46:59 CEST 2014


On 2014年05月22日 21:30, Liu Yuan wrote:
> On Thu, May 22, 2014 at 07:56:06PM +0800, Ruoyu wrote:
>> Sometimes we want to quickly check whether some of the vdi objects
>> or data objects are lost due to unexpected issue.
>>
>> Although dog vdi check will do, it spends a lot of time because of
>> too many client-server communication. And the probability of
>> triggering data auto fixing is quite low since the writing process
>> is strong consistency.
>>
>> Therefore, the new command check whether all the objects related to
>> the vdi are existed or not. It is fast because it submit the
>> batched object id only one time per node. I think this is enough
>> for the situation.
>>
>> Usage: dog vdi object exist <vdiname>
> This would be confusing with 'dog vdi object location'. I think
>
> 'dog vdi check --light' is a better interface.
>
> Besides, I think you can check SD_OP_EXIST operation for reference, you don't
> need
Yes. So do I.
>
>> Example:
>> $ dog vdi object exist test
>> test is fine, no object is missing.
>>
>> $ dog vdi object exist ucweb
>> [127.0.0.1:7001] oid 80b8071d00000000 is missing.
>> [127.0.0.1:7001] oid 00b8071d000000ee is missing.
>> ucweb lost 2 object(s).
>>
>> Signed-off-by: Ruoyu <liangry at ucweb.com>
>> ---
>>   dog/vdi.c                | 125 +++++++++++++++++++++++++++++++++++++++++++++++
>>   include/internal_proto.h |   9 ++++
>>   include/sheep.h          |   6 +++
>>   include/sheepdog_proto.h |   1 +
>>   lib/net.c                |   2 +-
>>   sheep/ops.c              |  31 ++++++++++++
>>   6 files changed, 173 insertions(+), 1 deletion(-)
>>
>> diff --git a/dog/vdi.c b/dog/vdi.c
>> index 9c34bfb..defb5bd 100644
>> --- a/dog/vdi.c
>> +++ b/dog/vdi.c
>> @@ -21,6 +21,8 @@
>>   #include "sha1.h"
>>   #include "fec.h"
>>   
>> +struct rb_root node_oids_root = RB_ROOT;
>> +
>>   static struct sd_option vdi_options[] = {
>>   	{'P', "prealloc", false, "preallocate all the data objects"},
>>   	{'i', "index", true, "specify the index of data objects"},
>> @@ -866,6 +868,127 @@ out:
>>   	return ret;
>>   }
>>   
>> +#define OIDS_INIT_LENGTH 1024
>> +
>> +static void store_oid(uint64_t oid, int copies)
>> +{
>> +	const struct sd_vnode *vnodes[SD_MAX_COPIES];
>> +	struct node_oids_entry *entry;
>> +
>> +	oid_to_vnodes(oid, &sd_vroot, copies, vnodes);
>> +	for (int i = 0; i < copies; i++) {
>> +		struct node_oids_entry key = {
>> +			.node = (struct sd_node *) vnodes[i]->node
>> +		};
>> +		entry = rb_search(&node_oids_root, &key,
>> +				rb, node_oids_entry_cmp);
>> +		if (!entry)
>> +			panic("rb_search() failure.");
>> +
>> +		if (entry->pos >= entry->last) {
>> +			entry->last *= 2;
>> +			entry->oids = xrealloc(entry->oids,
>> +					sizeof(uint64_t) * entry->last);
>> +		}
>> +		entry->oids[entry->pos] = oid;
>> +		entry->pos++;
>> +	}
>> +}
>> +
>> +static void init_node_oids_tree(const struct sd_inode *inode)
>> +{
>> +	uint32_t max_idx, vid;
>> +	uint64_t oid;
>> +	struct sd_node *node;
>> +	struct node_oids_entry *entry;
>> +	int nr_copies = min((int)inode->nr_copies, sd_zones_nr);
>> +
>> +	rb_for_each_entry(node, &sd_nroot, rb) {
>> +		entry = xmalloc(sizeof(*entry));
>> +		entry->node = node;
>> +		entry->oids = xmalloc(sizeof(uint64_t) * OIDS_INIT_LENGTH);
>> +		entry->last = OIDS_INIT_LENGTH;
>> +		entry->pos = 0;
>> +		rb_insert(&node_oids_root, entry, rb, node_oids_entry_cmp);
>> +	}
>> +
>> +	store_oid(vid_to_vdi_oid(inode->vdi_id), nr_copies);
>> +	max_idx = count_data_objs(inode);
>> +	for (uint32_t idx = 0; idx < max_idx; idx++) {
>> +		vid = sd_inode_get_vid(inode, idx);
>> +		if (vid == 0)
>> +			continue;
>> +		oid = vid_to_data_oid(vid, idx);
>> +		store_oid(oid, nr_copies);
>> +	}
>> +}
>> +
>> +static void destroy_node_oids_tree(void)
>> +{
>> +	struct node_oids_entry *entry;
>> +
>> +	rb_for_each_entry(entry, &node_oids_root, rb)
>> +		free(entry->oids);
>> +	rb_destroy(&node_oids_root, struct sd_node, rb);
>> +}
>> +
>> +static int do_vdi_object_exist(const struct sd_inode *inode)
>> +{
>> +	int total = 0;
>> +	struct node_oids_entry *entry;
>> +	struct sd_req hdr;
>> +	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
>> +
>> +	init_node_oids_tree(inode);
>> +
>> +	rb_for_each_entry(entry, &node_oids_root, rb) {
>> +		sd_init_req(&hdr, SD_OP_OIDS_EXIST);
>> +		hdr.data_length = sizeof(uint64_t) * entry->pos;
>> +		hdr.flags = SD_FLAG_CMD_WRITE | SD_FLAG_CMD_READ;
>> +		int ret = dog_exec_req(&entry->node->nid, &hdr, entry->oids);
>> +		if (ret < 0)
>> +			panic("dog_exec_req() failure.");
>> +
>> +		int n = rsp->data_length / sizeof(uint64_t);
>> +		total += n;
>> +		for (int i = 0; i < n; i++)
>> +			printf("[%s] oid %016"PRIx64" is missing.\n",
>> +					addr_to_str(entry->node->nid.addr,
>> +							entry->node->nid.port),
>> +					entry->oids[i]);
>> +	}
>> +
>> +	destroy_node_oids_tree();
>> +	return total;
>> +}
>> +
>> +static int vdi_object_exist(int argc, char **argv)
>> +{
>> +	const char *vdiname = argv[optind++];
>> +	int total, ret;
>> +	struct sd_inode *inode = xmalloc(sizeof(*inode));
>> +
>> +	ret = read_vdi_obj(vdiname, vdi_cmd_data.snapshot_id,
>> +			   vdi_cmd_data.snapshot_tag, NULL, inode,
>> +			   SD_INODE_SIZE);
>> +	if (ret != EXIT_SUCCESS) {
>> +		sd_err("FATAL: no inode objects");
>> +		goto out;
>> +	}
>> +
>> +	total = do_vdi_object_exist(inode);
>> +	if (total == 0) {
>> +		printf("%s is fine, no object is missing.\n", vdiname);
>> +		ret = EXIT_SUCCESS;
>> +	} else {
>> +		printf("%s lost %d object(s).\n", vdiname, total);
>> +		ret = EXIT_FAILURE;
>> +	}
>> +out:
>> +	free(inode);
>> +	return ret;
>> +}
>> +
>>   static int do_track_object(uint64_t oid, uint8_t nr_copies)
>>   {
>>   	int i, j, ret;
>> @@ -2323,6 +2446,8 @@ static struct subcommand vdi_object_cmd[] = {
>>   	 NULL, CMD_NEED_ARG, vdi_object_map},
>>   	{"dump-inode", NULL, NULL, "dump inode information",
>>   	 NULL, CMD_NEED_ARG, vdi_object_dump_inode},
>> +	{"exist", NULL, NULL, "show which objects are missing",
>> +	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_object_exist},
>>   	{NULL},
>>   };
>>   
>> diff --git a/include/internal_proto.h b/include/internal_proto.h
>> index 73ed581..53b5b00 100644
>> --- a/include/internal_proto.h
>> +++ b/include/internal_proto.h
>> @@ -100,6 +100,7 @@
>>   #define SD_OP_NFS_DELETE	0xBC
>>   #define SD_OP_EXIST	0xBD
>>   #define SD_OP_CLUSTER_INFO	0xBE
>> +#define SD_OP_OIDS_EXIST	0xBF
>>   
>>   /* internal flags for hdr.flags, must be above 0x80 */
>>   #define SD_FLAG_CMD_RECOVERY 0x0080
>> @@ -173,6 +174,14 @@ struct sd_node {
>>   #endif
>>   };
>>   
>> +struct node_oids_entry {
>> +	struct rb_node rb;
>> +	struct sd_node *node;
>> +	uint64_t *oids;
>> +	int last;
>> +	int pos;
>> +};
>> +
>>   /*
>>    * A joining sheep multicasts the local cluster info.  Then, the existing nodes
>>    * reply the latest cluster info which is unique among all of the nodes.
>> diff --git a/include/sheep.h b/include/sheep.h
>> index 785883e..9b56dd8 100644
>> --- a/include/sheep.h
>> +++ b/include/sheep.h
>> @@ -199,6 +199,12 @@ static inline int node_cmp(const struct sd_node *node1,
>>   	return node_id_cmp(&node1->nid, &node2->nid);
>>   }
>>   
>> +static inline int node_oids_entry_cmp(const struct node_oids_entry *entry1,
>> +			   const struct node_oids_entry *entry2)
>> +{
>> +	return node_cmp(entry1->node, entry2->node);
>> +}
>> +
>>   static inline bool node_eq(const struct sd_node *a, const struct sd_node *b)
>>   {
>>   	return node_cmp(a, b) == 0;
>> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
>> index 9361bad..c6e21b4 100644
>> --- a/include/sheepdog_proto.h
>> +++ b/include/sheepdog_proto.h
>> @@ -45,6 +45,7 @@
>>   #define SD_FLAG_CMD_COW      0x02
>>   #define SD_FLAG_CMD_CACHE    0x04
>>   #define SD_FLAG_CMD_DIRECT   0x08 /* don't use object cache */
>> +#define SD_FLAG_CMD_READ     0x10
> why you need this SD_FLAG_CMD_READ?
Is there a flag stand for both write and read? The new command need it 
because it sends oid array to be checked and receives the lost oid array.
Therefore, I modify the function exec_req() as below.

--- a/lib/net.c
+++ b/lib/net.c
@@ -333,7 +333,7 @@ int exec_req(int sockfd, struct sd_req *hdr, void *data,
  
  	if (hdr->flags & SD_FLAG_CMD_WRITE) {
  		wlen = hdr->data_length;
-		rlen = 0;
+		rlen = (hdr->flags & SD_FLAG_CMD_READ) ? hdr->data_length : 0;
  	} else {
  		wlen = 0;
  		rlen = hdr->data_length;


>
> Thanks
> Yuan





More information about the sheepdog mailing list