[sheepdog] [PATCH] add dog vdi object exist command to check which objects are lost.
Ruoyu
liangry at ucweb.com
Fri May 23 03:46:59 CEST 2014
On 2014年05月22日 21:30, Liu Yuan wrote:
> On Thu, May 22, 2014 at 07:56:06PM +0800, Ruoyu wrote:
>> Sometimes we want to quickly check whether some of the vdi objects
>> or data objects are lost due to unexpected issue.
>>
>> Although dog vdi check will do, it spends a lot of time because of
>> too many client-server communication. And the probability of
>> triggering data auto fixing is quite low since the writing process
>> is strong consistency.
>>
>> Therefore, the new command check whether all the objects related to
>> the vdi are existed or not. It is fast because it submit the
>> batched object id only one time per node. I think this is enough
>> for the situation.
>>
>> Usage: dog vdi object exist <vdiname>
> This would be confusing with 'dog vdi object location'. I think
>
> 'dog vdi check --light' is a better interface.
>
> Besides, I think you can check SD_OP_EXIST operation for reference, you don't
> need
Yes. So do I.
>
>> Example:
>> $ dog vdi object exist test
>> test is fine, no object is missing.
>>
>> $ dog vdi object exist ucweb
>> [127.0.0.1:7001] oid 80b8071d00000000 is missing.
>> [127.0.0.1:7001] oid 00b8071d000000ee is missing.
>> ucweb lost 2 object(s).
>>
>> Signed-off-by: Ruoyu <liangry at ucweb.com>
>> ---
>> dog/vdi.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++
>> include/internal_proto.h | 9 ++++
>> include/sheep.h | 6 +++
>> include/sheepdog_proto.h | 1 +
>> lib/net.c | 2 +-
>> sheep/ops.c | 31 ++++++++++++
>> 6 files changed, 173 insertions(+), 1 deletion(-)
>>
>> diff --git a/dog/vdi.c b/dog/vdi.c
>> index 9c34bfb..defb5bd 100644
>> --- a/dog/vdi.c
>> +++ b/dog/vdi.c
>> @@ -21,6 +21,8 @@
>> #include "sha1.h"
>> #include "fec.h"
>>
>> +struct rb_root node_oids_root = RB_ROOT;
>> +
>> static struct sd_option vdi_options[] = {
>> {'P', "prealloc", false, "preallocate all the data objects"},
>> {'i', "index", true, "specify the index of data objects"},
>> @@ -866,6 +868,127 @@ out:
>> return ret;
>> }
>>
>> +#define OIDS_INIT_LENGTH 1024
>> +
>> +static void store_oid(uint64_t oid, int copies)
>> +{
>> + const struct sd_vnode *vnodes[SD_MAX_COPIES];
>> + struct node_oids_entry *entry;
>> +
>> + oid_to_vnodes(oid, &sd_vroot, copies, vnodes);
>> + for (int i = 0; i < copies; i++) {
>> + struct node_oids_entry key = {
>> + .node = (struct sd_node *) vnodes[i]->node
>> + };
>> + entry = rb_search(&node_oids_root, &key,
>> + rb, node_oids_entry_cmp);
>> + if (!entry)
>> + panic("rb_search() failure.");
>> +
>> + if (entry->pos >= entry->last) {
>> + entry->last *= 2;
>> + entry->oids = xrealloc(entry->oids,
>> + sizeof(uint64_t) * entry->last);
>> + }
>> + entry->oids[entry->pos] = oid;
>> + entry->pos++;
>> + }
>> +}
>> +
>> +static void init_node_oids_tree(const struct sd_inode *inode)
>> +{
>> + uint32_t max_idx, vid;
>> + uint64_t oid;
>> + struct sd_node *node;
>> + struct node_oids_entry *entry;
>> + int nr_copies = min((int)inode->nr_copies, sd_zones_nr);
>> +
>> + rb_for_each_entry(node, &sd_nroot, rb) {
>> + entry = xmalloc(sizeof(*entry));
>> + entry->node = node;
>> + entry->oids = xmalloc(sizeof(uint64_t) * OIDS_INIT_LENGTH);
>> + entry->last = OIDS_INIT_LENGTH;
>> + entry->pos = 0;
>> + rb_insert(&node_oids_root, entry, rb, node_oids_entry_cmp);
>> + }
>> +
>> + store_oid(vid_to_vdi_oid(inode->vdi_id), nr_copies);
>> + max_idx = count_data_objs(inode);
>> + for (uint32_t idx = 0; idx < max_idx; idx++) {
>> + vid = sd_inode_get_vid(inode, idx);
>> + if (vid == 0)
>> + continue;
>> + oid = vid_to_data_oid(vid, idx);
>> + store_oid(oid, nr_copies);
>> + }
>> +}
>> +
>> +static void destroy_node_oids_tree(void)
>> +{
>> + struct node_oids_entry *entry;
>> +
>> + rb_for_each_entry(entry, &node_oids_root, rb)
>> + free(entry->oids);
>> + rb_destroy(&node_oids_root, struct sd_node, rb);
>> +}
>> +
>> +static int do_vdi_object_exist(const struct sd_inode *inode)
>> +{
>> + int total = 0;
>> + struct node_oids_entry *entry;
>> + struct sd_req hdr;
>> + struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
>> +
>> + init_node_oids_tree(inode);
>> +
>> + rb_for_each_entry(entry, &node_oids_root, rb) {
>> + sd_init_req(&hdr, SD_OP_OIDS_EXIST);
>> + hdr.data_length = sizeof(uint64_t) * entry->pos;
>> + hdr.flags = SD_FLAG_CMD_WRITE | SD_FLAG_CMD_READ;
>> + int ret = dog_exec_req(&entry->node->nid, &hdr, entry->oids);
>> + if (ret < 0)
>> + panic("dog_exec_req() failure.");
>> +
>> + int n = rsp->data_length / sizeof(uint64_t);
>> + total += n;
>> + for (int i = 0; i < n; i++)
>> + printf("[%s] oid %016"PRIx64" is missing.\n",
>> + addr_to_str(entry->node->nid.addr,
>> + entry->node->nid.port),
>> + entry->oids[i]);
>> + }
>> +
>> + destroy_node_oids_tree();
>> + return total;
>> +}
>> +
>> +static int vdi_object_exist(int argc, char **argv)
>> +{
>> + const char *vdiname = argv[optind++];
>> + int total, ret;
>> + struct sd_inode *inode = xmalloc(sizeof(*inode));
>> +
>> + ret = read_vdi_obj(vdiname, vdi_cmd_data.snapshot_id,
>> + vdi_cmd_data.snapshot_tag, NULL, inode,
>> + SD_INODE_SIZE);
>> + if (ret != EXIT_SUCCESS) {
>> + sd_err("FATAL: no inode objects");
>> + goto out;
>> + }
>> +
>> + total = do_vdi_object_exist(inode);
>> + if (total == 0) {
>> + printf("%s is fine, no object is missing.\n", vdiname);
>> + ret = EXIT_SUCCESS;
>> + } else {
>> + printf("%s lost %d object(s).\n", vdiname, total);
>> + ret = EXIT_FAILURE;
>> + }
>> +out:
>> + free(inode);
>> + return ret;
>> +}
>> +
>> static int do_track_object(uint64_t oid, uint8_t nr_copies)
>> {
>> int i, j, ret;
>> @@ -2323,6 +2446,8 @@ static struct subcommand vdi_object_cmd[] = {
>> NULL, CMD_NEED_ARG, vdi_object_map},
>> {"dump-inode", NULL, NULL, "dump inode information",
>> NULL, CMD_NEED_ARG, vdi_object_dump_inode},
>> + {"exist", NULL, NULL, "show which objects are missing",
>> + NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_object_exist},
>> {NULL},
>> };
>>
>> diff --git a/include/internal_proto.h b/include/internal_proto.h
>> index 73ed581..53b5b00 100644
>> --- a/include/internal_proto.h
>> +++ b/include/internal_proto.h
>> @@ -100,6 +100,7 @@
>> #define SD_OP_NFS_DELETE 0xBC
>> #define SD_OP_EXIST 0xBD
>> #define SD_OP_CLUSTER_INFO 0xBE
>> +#define SD_OP_OIDS_EXIST 0xBF
>>
>> /* internal flags for hdr.flags, must be above 0x80 */
>> #define SD_FLAG_CMD_RECOVERY 0x0080
>> @@ -173,6 +174,14 @@ struct sd_node {
>> #endif
>> };
>>
>> +struct node_oids_entry {
>> + struct rb_node rb;
>> + struct sd_node *node;
>> + uint64_t *oids;
>> + int last;
>> + int pos;
>> +};
>> +
>> /*
>> * A joining sheep multicasts the local cluster info. Then, the existing nodes
>> * reply the latest cluster info which is unique among all of the nodes.
>> diff --git a/include/sheep.h b/include/sheep.h
>> index 785883e..9b56dd8 100644
>> --- a/include/sheep.h
>> +++ b/include/sheep.h
>> @@ -199,6 +199,12 @@ static inline int node_cmp(const struct sd_node *node1,
>> return node_id_cmp(&node1->nid, &node2->nid);
>> }
>>
>> +static inline int node_oids_entry_cmp(const struct node_oids_entry *entry1,
>> + const struct node_oids_entry *entry2)
>> +{
>> + return node_cmp(entry1->node, entry2->node);
>> +}
>> +
>> static inline bool node_eq(const struct sd_node *a, const struct sd_node *b)
>> {
>> return node_cmp(a, b) == 0;
>> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
>> index 9361bad..c6e21b4 100644
>> --- a/include/sheepdog_proto.h
>> +++ b/include/sheepdog_proto.h
>> @@ -45,6 +45,7 @@
>> #define SD_FLAG_CMD_COW 0x02
>> #define SD_FLAG_CMD_CACHE 0x04
>> #define SD_FLAG_CMD_DIRECT 0x08 /* don't use object cache */
>> +#define SD_FLAG_CMD_READ 0x10
> why you need this SD_FLAG_CMD_READ?
Is there a flag stand for both write and read? The new command need it
because it sends oid array to be checked and receives the lost oid array.
Therefore, I modify the function exec_req() as below.
--- a/lib/net.c
+++ b/lib/net.c
@@ -333,7 +333,7 @@ int exec_req(int sockfd, struct sd_req *hdr, void *data,
if (hdr->flags & SD_FLAG_CMD_WRITE) {
wlen = hdr->data_length;
- rlen = 0;
+ rlen = (hdr->flags & SD_FLAG_CMD_READ) ? hdr->data_length : 0;
} else {
wlen = 0;
rlen = hdr->data_length;
>
> Thanks
> Yuan
More information about the sheepdog
mailing list