[Sheepdog] [PATCH 3/3] collie: add an operation to track an object's locations at each epoch

Li Wenpeng levin108 at gmail.com
Fri Apr 13 10:20:02 CEST 2012


From: levin li <xingke.lwp at taobao.com>

When I was debuging the recovery issue(commit: 470533b), I found it
conveninet to have such a tool to track an object's trace at each epoch
to check whether an object is placed at the right place, so I add this
operation for collie at such case.

usage:
$collie/collie vdi track debian -i 3

output:

obj b1f0b000000003 locations at epoch 1, copies = 3
---------------------------------------------------
127.0.0.1:7004
127.0.0.1:7000
127.0.0.1:7001

obj b1f0b000000003 locations at epoch 2, copies = 3
---------------------------------------------------
127.0.0.1:7007
127.0.0.1:7004
127.0.0.1:7000

obj b1f0b000000003 locations at epoch 3, copies = 3
---------------------------------------------------
127.0.0.1:7007
127.0.0.1:7008
127.0.0.1:7004

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 collie/vdi.c       |  128 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/sheep.h    |    1 +
 sheep/ops.c        |    1 +
 sheep/sheep_priv.h |    1 +
 sheep/store.c      |    2 +-
 5 files changed, 132 insertions(+), 1 deletions(-)

diff --git a/collie/vdi.c b/collie/vdi.c
index 9675774..ac046b8 100644
--- a/collie/vdi.c
+++ b/collie/vdi.c
@@ -32,6 +32,9 @@ struct get_vdi_info {
 	uint32_t snapid;
 };
 
+struct sd_node latest_node_list[SD_MAX_NODES];
+int nr_latest_node_list;
+
 static int parse_option_size(const char *value, uint64_t *ret)
 {
 	char *postfix;
@@ -791,6 +794,129 @@ static int vdi_object(int argc, char **argv)
 	return EXIT_SUCCESS;
 }
 
+static int print_obj_epoch(uint64_t oid)
+{
+	int i, j, fd, ret, idx;
+	struct sd_vdi_req hdr;
+	struct sd_vdi_rsp *rsp = (struct sd_vdi_rsp *)&hdr;
+	unsigned rlen, wlen;
+	struct sd_vnode vnodes[SD_MAX_VNODES];
+	struct epoch_log *logs;
+	int vnodes_nr, nr_logs, log_length;
+	char host[128];
+
+	log_length = node_list_version * sizeof(struct epoch_log);
+again:
+	logs = malloc(log_length);
+	if (!logs) {
+		if (log_length < 10) {
+			fprintf(stderr, "No memory to allocate.\n");
+			return EXIT_SYSFAIL;
+		}
+		log_length /= 2;
+		goto again;
+	}
+
+	fd = connect_to(sdhost, sdport);
+	if (fd < 0)
+		goto error;
+
+	memset(&hdr, 0, sizeof(hdr));
+
+	hdr.opcode = SD_OP_STAT_CLUSTER;
+	hdr.epoch = node_list_version;
+	hdr.data_length = log_length;
+
+	rlen = hdr.data_length;
+	wlen = 0;
+	ret = exec_req(fd, (struct sd_req *)&hdr, logs, &wlen, &rlen);
+	close(fd);
+
+	if (ret != 0)
+		goto error;
+
+	if (rsp->result != SD_RES_SUCCESS)
+		printf("%s\n", sd_strerror(rsp->result));
+
+	nr_logs = rsp->data_length / sizeof(struct epoch_log);
+	for (i = nr_logs - 1; i >= 0; i--) {
+		vnodes_nr = nodes_to_vnodes(logs[i].nodes, logs[i].nr_nodes, vnodes);
+		printf("\nobj %"PRIx64" locations at epoch %d, copies = %d\n",
+				oid, logs[i].epoch, logs[i].nr_copies);
+		printf("---------------------------------------------------\n");
+		for (j = 0; j < logs[i].nr_copies; j++) {
+			idx = obj_to_sheep(vnodes, vnodes_nr, oid, j);
+			addr_to_str(host, sizeof(host), vnodes[idx].addr,
+						vnodes[idx].port);
+			printf("%s\n", host);
+		}
+	}
+
+	free(logs);
+	return EXIT_SUCCESS;
+error:
+	free(logs);
+	return EXIT_SYSFAIL;
+}
+
+static int vdi_track(int argc, char **argv)
+{
+	char *vdiname = argv[optind];
+	unsigned idx = vdi_cmd_data.index;
+	int ret;
+	struct get_vdi_info info;
+	uint32_t vid;
+
+	memset(&info, 0, sizeof(info));
+	info.name = vdiname;
+	info.tag = vdi_cmd_data.snapshot_tag;
+	info.vid = 0;
+	info.snapid = vdi_cmd_data.snapshot_id;
+
+	ret = parse_vdi(get_oid, SD_INODE_HEADER_SIZE, &info);
+
+	vid = info.vid;
+	if (vid == 0) {
+		fprintf(stderr, "VDI not found\n");
+		return EXIT_MISSING;
+	}
+
+	if (idx == ~0) {
+		printf("Tracking the inode object 0x%" PRIx32 " with %d nodes\n",
+		       vid, nr_nodes);
+		print_obj_epoch(vid_to_vdi_oid(vid));
+	} else {
+		struct get_data_oid_info oid_info;
+
+		oid_info.success = 0;
+		oid_info.idx = idx;
+
+		if (idx >= MAX_DATA_OBJS) {
+			printf("The offset is too large!\n");
+			exit(EXIT_FAILURE);
+		}
+
+		parse_objs(vid_to_vdi_oid(vid), get_data_oid,
+					&oid_info, SD_DATA_OBJ_SIZE);
+
+		if (oid_info.success) {
+			if (oid_info.data_oid) {
+				printf("Tracking the object 0x%" PRIx64
+				       " (the inode vid 0x%" PRIx32 " idx %u)"
+					   " with %d nodes\n",
+				       oid_info.data_oid, vid, idx, nr_nodes);
+				print_obj_epoch(oid_info.data_oid);
+
+			} else
+				printf("The inode object 0x%" PRIx32 " idx %u is not allocated\n",
+				       vid, idx);
+		} else
+			fprintf(stderr, "Failed to read the inode object 0x%"PRIx32"\n", vid);
+	}
+
+	return EXIT_SUCCESS;
+}
+
 static int find_vdi_attr_oid(char *vdiname, char *tag, uint32_t snapid,
 			     char *key, void *value, unsigned int value_len,
 			     uint32_t *vid, uint64_t *oid, unsigned int *nr_copies,
@@ -1205,6 +1331,8 @@ static struct subcommand vdi_cmd[] = {
 	 SUBCMD_FLAG_NEED_NODELIST, vdi_graph},
 	{"object", "<vdiname>", "isaph", "show object information in the image",
 	 SUBCMD_FLAG_NEED_NODELIST|SUBCMD_FLAG_NEED_THIRD_ARG, vdi_object},
+	{"track", "<vdiname>", "isaph", "show the object epoch trace in the image",
+	 SUBCMD_FLAG_NEED_NODELIST|SUBCMD_FLAG_NEED_THIRD_ARG, vdi_track},
 	{"setattr", "<vdiname> <key> [value]", "dxaph", "set a VDI attribute",
 	 SUBCMD_FLAG_NEED_NODELIST|SUBCMD_FLAG_NEED_THIRD_ARG, vdi_setattr},
 	{"getattr", "<vdiname> <key>", "aph", "get a VDI attribute",
diff --git a/include/sheep.h b/include/sheep.h
index d010fdf..fc2ac58 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -164,6 +164,7 @@ struct epoch_log {
 	uint64_t time;
 	uint32_t epoch;
 	uint32_t nr_nodes;
+	uint32_t nr_copies;
 	struct sd_node nodes[SD_MAX_NODES];
 };
 
diff --git a/sheep/ops.c b/sheep/ops.c
index ee6dee1..99bde61 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -312,6 +312,7 @@ static int local_stat_cluster(const struct sd_req *req, struct sd_rsp *rsp,
 			log->nr_nodes = epoch_log_read_remote(epoch,
 							      (char *)log->nodes,
 							      sizeof(log->nodes));
+		log->nr_copies = get_max_copies(log->nodes, log->nr_nodes);
 
 		rsp->data_length += sizeof(*log);
 		log->nr_nodes /= sizeof(log->nodes[0]);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index a9e8440..77b1586 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -273,6 +273,7 @@ int store_remove_obj(const struct sd_req *, struct sd_rsp *, void *);
 
 int store_file_write(void *buffer, size_t len);
 void *store_file_read(void);
+int get_max_copies(struct sd_node *entries, int nr);
 
 #define NR_GW_WORKER_THREAD 4
 #define NR_IO_WORKER_THREAD 4
diff --git a/sheep/store.c b/sheep/store.c
index 739862c..216be97 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1118,7 +1118,7 @@ uint64_t get_cluster_ctime(void)
 	return ct;
 }
 
-static int get_max_copies(struct sd_node *entries, int nr)
+int get_max_copies(struct sd_node *entries, int nr)
 {
 	int i, j;
 	unsigned int nr_zones = 0;
-- 
1.7.1




More information about the sheepdog mailing list