[sheepdog] [PATCH v4] add vdi check -e to quickly check which objects are lost
Ruoyu
liangry at ucweb.com
Mon Jul 28 12:11:42 CEST 2014
Sometimes we want to quickly check whether some of the vdi objects
or data objects are lost due to unexpected issue.
Although vdi check will do, it spends a lot of time because of
too many client-server communication. And the probability of
triggering data auto fixing is quite low since the writing process
is strong consistency.
Therefore, the new option -e (--exist) check whether all the objects
related to the vdi are existed or not. It is fast because it submit
the batched object id only one time per node. I think this is enough
for the situation.
Usage: dog vdi check -e <vdiname>
Example:
$ dog vdi check -e test
test is fine, no object is missing.
$ dog vdi check -e ucweb
[127.0.0.1:7001] oid 80b8071d00000000 is missing.
[127.0.0.1:7001] oid 00b8071d000000ee is missing.
ucweb lost 2 object(s).
v4 is rebased on the latest master and
1. helping message is updated to tell user vdi check -e will
not comparing nor repairing objects
2. the function do_obj_check is renamed as do_vdi_check_exist
3. a new command flag, SD_FLAG_CMD_FILTER, is introduced because
both read and write is not so appropriate
4. the value of SD_FLAG_CMD_FILTER is changed because the original one
is occupied
Signed-off-by: Ruoyu <liangry at ucweb.com>
---
dog/vdi.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++-
include/internal_proto.h | 9 ++++
include/sheep.h | 6 +++
include/sheepdog_proto.h | 1 +
lib/net.c | 3 ++
sheep/ops.c | 31 +++++++++++++
6 files changed, 163 insertions(+), 2 deletions(-)
diff --git a/dog/vdi.c b/dog/vdi.c
index 97ae63c..93ae763 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -21,6 +21,8 @@
#include "sha1.h"
#include "fec.h"
+struct rb_root oid_tree = RB_ROOT;
+
static struct sd_option vdi_options[] = {
{'P', "prealloc", false, "preallocate all the data objects"},
{'n', "no-share", false, "share nothing with its parent"},
@@ -34,6 +36,8 @@ static struct sd_option vdi_options[] = {
{'f', "force", false, "do operation forcibly"},
{'y', "hyper", false, "create a hyper volume"},
{'o', "oid", true, "specify the object id of the tracking object"},
+ {'e', "exist", false, "only check objects exist or not,\n"
+ " neither comparing nor repairing"},
{ 0, NULL, false, NULL },
};
@@ -53,6 +57,7 @@ static struct vdi_cmd_data {
uint8_t store_policy;
uint64_t oid;
bool no_share;
+ bool exist;
} vdi_cmd_data = { ~0, };
struct get_vdi_info {
@@ -985,6 +990,106 @@ out:
return ret;
}
+#define OIDS_INIT_LENGTH 1024
+
+static void save_oid(uint64_t oid, int copies)
+{
+ const struct sd_vnode *vnodes[SD_MAX_COPIES];
+ struct oid_entry *entry;
+
+ oid_to_vnodes(oid, &sd_vroot, copies, vnodes);
+ for (int i = 0; i < copies; i++) {
+ struct oid_entry key = {
+ .node = (struct sd_node *) vnodes[i]->node
+ };
+ entry = rb_search(&oid_tree, &key, rb, oid_entry_cmp);
+ if (!entry)
+ panic("rb_search() failure.");
+
+ if (entry->last >= entry->end) {
+ entry->end *= 2;
+ entry->oids = xrealloc(entry->oids,
+ sizeof(uint64_t) * entry->end);
+ }
+ entry->oids[entry->last] = oid;
+ entry->last++;
+ }
+}
+
+static void build_oid_tree(const struct sd_inode *inode)
+{
+ uint32_t max_idx, vid;
+ uint64_t oid;
+ struct sd_node *node;
+ struct oid_entry *entry;
+ int copies = min((int)inode->nr_copies, sd_zones_nr);
+
+ rb_for_each_entry(node, &sd_nroot, rb) {
+ entry = xmalloc(sizeof(*entry));
+ entry->node = node;
+ entry->oids = xmalloc(sizeof(uint64_t) * OIDS_INIT_LENGTH);
+ entry->end = OIDS_INIT_LENGTH;
+ entry->last = 0;
+ rb_insert(&oid_tree, entry, rb, oid_entry_cmp);
+ }
+
+ save_oid(vid_to_vdi_oid(inode->vdi_id), copies);
+ max_idx = count_data_objs(inode);
+ for (uint32_t idx = 0; idx < max_idx; idx++) {
+ vid = sd_inode_get_vid(inode, idx);
+ if (vid == 0)
+ continue;
+ oid = vid_to_data_oid(vid, idx);
+ save_oid(oid, copies);
+ }
+}
+
+static void destroy_oid_tree(void)
+{
+ struct oid_entry *entry;
+
+ rb_for_each_entry(entry, &oid_tree, rb)
+ free(entry->oids);
+ rb_destroy(&oid_tree, struct oid_entry, rb);
+}
+
+static int do_vdi_check_exist(const struct sd_inode *inode)
+{
+ int total = 0;
+ struct oid_entry *entry;
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+
+ build_oid_tree(inode);
+
+ rb_for_each_entry(entry, &oid_tree, rb) {
+ sd_init_req(&hdr, SD_OP_OIDS_EXIST);
+ hdr.data_length = sizeof(uint64_t) * entry->last;
+ hdr.flags = SD_FLAG_CMD_FILTER;
+ int ret = dog_exec_req(&entry->node->nid, &hdr, entry->oids);
+ if (ret < 0)
+ panic("dog_exec_req() failure.");
+
+ int n = rsp->data_length / sizeof(uint64_t);
+ total += n;
+ for (int i = 0; i < n; i++)
+ printf("[%s] oid %016"PRIx64" is missing.\n",
+ addr_to_str(entry->node->nid.addr,
+ entry->node->nid.port),
+ entry->oids[i]);
+ }
+
+ destroy_oid_tree();
+
+ if (total == 0) {
+ printf("%s is fine, no object is missing.\n", inode->name);
+ return EXIT_SUCCESS;
+ } else {
+ printf("%s lost %d object(s).\n", inode->name, total);
+ return EXIT_FAILURE;
+ }
+}
+
static int do_track_object(uint64_t oid, uint8_t nr_copies)
{
int i, j, ret;
@@ -1873,7 +1978,10 @@ static int vdi_check(int argc, char **argv)
goto out;
}
- ret = do_vdi_check(inode);
+ if (vdi_cmd_data.exist)
+ ret = do_vdi_check_exist(inode);
+ else
+ ret = do_vdi_check(inode);
out:
free(inode);
return ret;
@@ -2591,7 +2699,7 @@ static int vdi_alter_copy(int argc, char **argv)
}
static struct subcommand vdi_cmd[] = {
- {"check", "<vdiname>", "sapht", "check and repair image's consistency",
+ {"check", "<vdiname>", "seapht", "check and repair image's consistency",
NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
vdi_check, vdi_options},
{"create", "<vdiname> <size>", "Pycaphrvt", "create an image",
@@ -2735,6 +2843,9 @@ static int vdi_parser(int ch, const char *opt)
exit(EXIT_FAILURE);
}
break;
+ case 'e':
+ vdi_cmd_data.exist = true;
+ break;
}
return 0;
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 2affc42..37afb46 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -107,6 +107,7 @@
#define SD_OP_PREVENT_INODE_UPDATE 0xC3
#define SD_OP_ALLOW_INODE_UPDATE 0xC4
#define SD_OP_REPAIR_REPLICA 0xC5
+#define SD_OP_OIDS_EXIST 0xC6
/* internal flags for hdr.flags, must be above 0x80 */
#define SD_FLAG_CMD_RECOVERY 0x0080
@@ -180,6 +181,14 @@ struct sd_node {
#endif
};
+struct oid_entry {
+ struct rb_node rb;
+ struct sd_node *node; /* key */
+ uint64_t *oids; /* object id array */
+ int end; /* idx to the end of the allocated oid array */
+ int last; /* idx to the last element of the oid array */
+};
+
/*
* A joining sheep multicasts the local cluster info. Then, the existing nodes
* reply the latest cluster info which is unique among all of the nodes.
diff --git a/include/sheep.h b/include/sheep.h
index e062372..5b136a8 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -199,6 +199,12 @@ static inline int node_cmp(const struct sd_node *node1,
return node_id_cmp(&node1->nid, &node2->nid);
}
+static inline int oid_entry_cmp(const struct oid_entry *entry1,
+ const struct oid_entry *entry2)
+{
+ return node_cmp(entry1->node, entry2->node);
+}
+
static inline bool node_eq(const struct sd_node *a, const struct sd_node *b)
{
return node_cmp(a, b) == 0;
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index d6a8d35..b4e1e13 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -50,6 +50,7 @@
#define SD_FLAG_CMD_COW 0x02
#define SD_FLAG_CMD_CACHE 0x04
#define SD_FLAG_CMD_DIRECT 0x08 /* don't use object cache */
+#define SD_FLAG_CMD_FILTER 0x11 /* write & read, output is subset of input */
/* flags above 0x80 are sheepdog-internal */
#define SD_RES_SUCCESS 0x00 /* Success */
diff --git a/lib/net.c b/lib/net.c
index b32e022..552e945 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -334,6 +334,9 @@ int exec_req(int sockfd, struct sd_req *hdr, void *data,
if (hdr->flags & SD_FLAG_CMD_WRITE) {
wlen = hdr->data_length;
rlen = 0;
+ } else if (hdr->flags & SD_FLAG_CMD_FILTER) {
+ wlen = hdr->data_length;
+ rlen = hdr->data_length;
} else {
wlen = 0;
rlen = hdr->data_length;
diff --git a/sheep/ops.c b/sheep/ops.c
index dc10f0f..3d20c7d 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -1056,6 +1056,30 @@ static int local_oid_exist(struct request *req)
return SD_RES_NO_OBJ;
}
+static int local_oids_exist(const struct sd_req *req, struct sd_rsp *rsp,
+ void *data)
+{
+ struct request *r = container_of(req, struct request, rq);
+ uint64_t *oids = (uint64_t *) data;
+ uint8_t ec_index;
+ int i, j, n = req->data_length / sizeof(uint64_t);
+
+ for (i = 0, j = 0; i < n; i++) {
+ ec_index = local_ec_index(r->vinfo, oids[i]);
+ if (is_erasure_oid(oids[i]) && ec_index == SD_MAX_COPIES)
+ oids[j++] = oids[i];
+ else if (!sd_store->exist(oids[i], ec_index))
+ oids[j++] = oids[i];
+ }
+
+ if (j > 0) {
+ rsp->data_length = sizeof(uint64_t) * j;
+ return SD_RES_NO_OBJ;
+ }
+
+ return SD_RES_SUCCESS;
+}
+
static int local_cluster_info(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
@@ -1594,6 +1618,13 @@ static struct sd_op_template sd_ops[] = {
.process_work = local_oid_exist,
},
+ [SD_OP_OIDS_EXIST] = {
+ .name = "OIDS_EXIST",
+ .type = SD_OP_TYPE_LOCAL,
+ .force = true,
+ .process_main = local_oids_exist,
+ },
+
[SD_OP_CLUSTER_INFO] = {
.name = "CLUSTER INFO",
.type = SD_OP_TYPE_LOCAL,
--
1.8.3.2
More information about the sheepdog
mailing list