[sheepdog] [PATCH] add dog vdi object exist command to check which objects are lost.

Ruoyu liangry at ucweb.com
Thu May 22 13:56:06 CEST 2014


Sometimes we want to quickly check whether some of the vdi objects
or data objects are lost due to unexpected issue.

Although dog vdi check will do, it spends a lot of time because of
too many client-server communication. And the probability of
triggering data auto fixing is quite low since the writing process
is strong consistency.

Therefore, the new command check whether all the objects related to
the vdi are existed or not. It is fast because it submit the
batched object id only one time per node. I think this is enough
for the situation.

Usage: dog vdi object exist <vdiname>

Example:
$ dog vdi object exist test
test is fine, no object is missing.

$ dog vdi object exist ucweb
[127.0.0.1:7001] oid 80b8071d00000000 is missing.
[127.0.0.1:7001] oid 00b8071d000000ee is missing.
ucweb lost 2 object(s).

Signed-off-by: Ruoyu <liangry at ucweb.com>
---
 dog/vdi.c                | 125 +++++++++++++++++++++++++++++++++++++++++++++++
 include/internal_proto.h |   9 ++++
 include/sheep.h          |   6 +++
 include/sheepdog_proto.h |   1 +
 lib/net.c                |   2 +-
 sheep/ops.c              |  31 ++++++++++++
 6 files changed, 173 insertions(+), 1 deletion(-)

diff --git a/dog/vdi.c b/dog/vdi.c
index 9c34bfb..defb5bd 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -21,6 +21,8 @@
 #include "sha1.h"
 #include "fec.h"
 
+struct rb_root node_oids_root = RB_ROOT;
+
 static struct sd_option vdi_options[] = {
 	{'P', "prealloc", false, "preallocate all the data objects"},
 	{'i', "index", true, "specify the index of data objects"},
@@ -866,6 +868,127 @@ out:
 	return ret;
 }
 
+#define OIDS_INIT_LENGTH 1024
+
+static void store_oid(uint64_t oid, int copies)
+{
+	const struct sd_vnode *vnodes[SD_MAX_COPIES];
+	struct node_oids_entry *entry;
+
+	oid_to_vnodes(oid, &sd_vroot, copies, vnodes);
+	for (int i = 0; i < copies; i++) {
+		struct node_oids_entry key = {
+			.node = (struct sd_node *) vnodes[i]->node
+		};
+		entry = rb_search(&node_oids_root, &key,
+				rb, node_oids_entry_cmp);
+		if (!entry)
+			panic("rb_search() failure.");
+
+		if (entry->pos >= entry->last) {
+			entry->last *= 2;
+			entry->oids = xrealloc(entry->oids,
+					sizeof(uint64_t) * entry->last);
+		}
+		entry->oids[entry->pos] = oid;
+		entry->pos++;
+	}
+}
+
+static void init_node_oids_tree(const struct sd_inode *inode)
+{
+	uint32_t max_idx, vid;
+	uint64_t oid;
+	struct sd_node *node;
+	struct node_oids_entry *entry;
+	int nr_copies = min((int)inode->nr_copies, sd_zones_nr);
+
+	rb_for_each_entry(node, &sd_nroot, rb) {
+		entry = xmalloc(sizeof(*entry));
+		entry->node = node;
+		entry->oids = xmalloc(sizeof(uint64_t) * OIDS_INIT_LENGTH);
+		entry->last = OIDS_INIT_LENGTH;
+		entry->pos = 0;
+		rb_insert(&node_oids_root, entry, rb, node_oids_entry_cmp);
+	}
+
+	store_oid(vid_to_vdi_oid(inode->vdi_id), nr_copies);
+	max_idx = count_data_objs(inode);
+	for (uint32_t idx = 0; idx < max_idx; idx++) {
+		vid = sd_inode_get_vid(inode, idx);
+		if (vid == 0)
+			continue;
+		oid = vid_to_data_oid(vid, idx);
+		store_oid(oid, nr_copies);
+	}
+}
+
+static void destroy_node_oids_tree(void)
+{
+	struct node_oids_entry *entry;
+
+	rb_for_each_entry(entry, &node_oids_root, rb)
+		free(entry->oids);
+	rb_destroy(&node_oids_root, struct sd_node, rb);
+}
+
+static int do_vdi_object_exist(const struct sd_inode *inode)
+{
+	int total = 0;
+	struct node_oids_entry *entry;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+
+	init_node_oids_tree(inode);
+
+	rb_for_each_entry(entry, &node_oids_root, rb) {
+		sd_init_req(&hdr, SD_OP_OIDS_EXIST);
+		hdr.data_length = sizeof(uint64_t) * entry->pos;
+		hdr.flags = SD_FLAG_CMD_WRITE | SD_FLAG_CMD_READ;
+		int ret = dog_exec_req(&entry->node->nid, &hdr, entry->oids);
+		if (ret < 0)
+			panic("dog_exec_req() failure.");
+
+		int n = rsp->data_length / sizeof(uint64_t);
+		total += n;
+		for (int i = 0; i < n; i++)
+			printf("[%s] oid %016"PRIx64" is missing.\n",
+					addr_to_str(entry->node->nid.addr,
+							entry->node->nid.port),
+					entry->oids[i]);
+	}
+
+	destroy_node_oids_tree();
+	return total;
+}
+
+static int vdi_object_exist(int argc, char **argv)
+{
+	const char *vdiname = argv[optind++];
+	int total, ret;
+	struct sd_inode *inode = xmalloc(sizeof(*inode));
+
+	ret = read_vdi_obj(vdiname, vdi_cmd_data.snapshot_id,
+			   vdi_cmd_data.snapshot_tag, NULL, inode,
+			   SD_INODE_SIZE);
+	if (ret != EXIT_SUCCESS) {
+		sd_err("FATAL: no inode objects");
+		goto out;
+	}
+
+	total = do_vdi_object_exist(inode);
+	if (total == 0) {
+		printf("%s is fine, no object is missing.\n", vdiname);
+		ret = EXIT_SUCCESS;
+	} else {
+		printf("%s lost %d object(s).\n", vdiname, total);
+		ret = EXIT_FAILURE;
+	}
+out:
+	free(inode);
+	return ret;
+}
+
 static int do_track_object(uint64_t oid, uint8_t nr_copies)
 {
 	int i, j, ret;
@@ -2323,6 +2446,8 @@ static struct subcommand vdi_object_cmd[] = {
 	 NULL, CMD_NEED_ARG, vdi_object_map},
 	{"dump-inode", NULL, NULL, "dump inode information",
 	 NULL, CMD_NEED_ARG, vdi_object_dump_inode},
+	{"exist", NULL, NULL, "show which objects are missing",
+	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_object_exist},
 	{NULL},
 };
 
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 73ed581..53b5b00 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -100,6 +100,7 @@
 #define SD_OP_NFS_DELETE	0xBC
 #define SD_OP_EXIST	0xBD
 #define SD_OP_CLUSTER_INFO	0xBE
+#define SD_OP_OIDS_EXIST	0xBF
 
 /* internal flags for hdr.flags, must be above 0x80 */
 #define SD_FLAG_CMD_RECOVERY 0x0080
@@ -173,6 +174,14 @@ struct sd_node {
 #endif
 };
 
+struct node_oids_entry {
+	struct rb_node rb;
+	struct sd_node *node;
+	uint64_t *oids;
+	int last;
+	int pos;
+};
+
 /*
  * A joining sheep multicasts the local cluster info.  Then, the existing nodes
  * reply the latest cluster info which is unique among all of the nodes.
diff --git a/include/sheep.h b/include/sheep.h
index 785883e..9b56dd8 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -199,6 +199,12 @@ static inline int node_cmp(const struct sd_node *node1,
 	return node_id_cmp(&node1->nid, &node2->nid);
 }
 
+static inline int node_oids_entry_cmp(const struct node_oids_entry *entry1,
+			   const struct node_oids_entry *entry2)
+{
+	return node_cmp(entry1->node, entry2->node);
+}
+
 static inline bool node_eq(const struct sd_node *a, const struct sd_node *b)
 {
 	return node_cmp(a, b) == 0;
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 9361bad..c6e21b4 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -45,6 +45,7 @@
 #define SD_FLAG_CMD_COW      0x02
 #define SD_FLAG_CMD_CACHE    0x04
 #define SD_FLAG_CMD_DIRECT   0x08 /* don't use object cache */
+#define SD_FLAG_CMD_READ     0x10
 /* flags above 0x80 are sheepdog-internal */
 
 #define SD_RES_SUCCESS       0x00 /* Success */
diff --git a/lib/net.c b/lib/net.c
index b32e022..c2d86cb 100644
--- a/lib/net.c
+++ b/lib/net.c
@@ -333,7 +333,7 @@ int exec_req(int sockfd, struct sd_req *hdr, void *data,
 
 	if (hdr->flags & SD_FLAG_CMD_WRITE) {
 		wlen = hdr->data_length;
-		rlen = 0;
+		rlen = (hdr->flags & SD_FLAG_CMD_READ) ? hdr->data_length : 0;
 	} else {
 		wlen = 0;
 		rlen = hdr->data_length;
diff --git a/sheep/ops.c b/sheep/ops.c
index 22bb8dc..2fd5ee8 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -1029,6 +1029,30 @@ static int local_oid_exist(struct request *req)
 	return SD_RES_NO_OBJ;
 }
 
+static int local_oids_exist(const struct sd_req *req, struct sd_rsp *rsp,
+			      void *data)
+{
+	struct request *r = container_of(req, struct request, rq);
+	uint64_t *oids = (uint64_t *) data;
+	uint8_t ec_index;
+	int i, j, n = req->data_length / sizeof(uint64_t);
+
+	for (i = 0, j = 0; i < n; i++) {
+		ec_index = local_ec_index(r->vinfo, oids[i]);
+		if (is_erasure_oid(oids[i]) && ec_index == SD_MAX_COPIES)
+			oids[j++] = oids[i];
+		else if (!sd_store->exist(oids[i], ec_index))
+			oids[j++] = oids[i];
+	}
+
+	if (j > 0) {
+		rsp->data_length = sizeof(uint64_t) * j;
+		return SD_RES_NO_OBJ;
+	}
+
+	return SD_RES_SUCCESS;
+}
+
 static int local_cluster_info(const struct sd_req *req, struct sd_rsp *rsp,
 			      void *data)
 {
@@ -1365,6 +1389,13 @@ static struct sd_op_template sd_ops[] = {
 		.process_work = local_oid_exist,
 	},
 
+	[SD_OP_OIDS_EXIST] =  {
+		.name = "OIDS_EXIST",
+		.type = SD_OP_TYPE_LOCAL,
+		.force = true,
+		.process_main = local_oids_exist,
+	},
+
 	[SD_OP_CLUSTER_INFO] = {
 		.name = "CLUSTER INFO",
 		.type = SD_OP_TYPE_LOCAL,
-- 
1.8.3.2





More information about the sheepdog mailing list