[sheepdog] [PATCH v5 2/6] sheep: add disk information into sd_node

Robin Dong robin.k.dong at gmail.com
Wed May 21 05:41:55 CEST 2014


From: Robin Dong <sanbai at taobao.com>

Add disk information into sd_node therefore it will be sended
to cluster shared region (corosync/zookeeper, etc.) and be
aware of by all nodes in this cluster.

The new size of sd_node is 592 bytes and the max size of zookeeper
is 983080, so the SD_MAX_NODES is 983080/(592 * 2) = 830.

Signed-off-by: Robin Dong <sanbai at taobao.com>
---
 dog/cluster.c            |  4 +++
 include/internal_proto.h | 18 ++++++++++++--
 include/sheep.h          |  5 ++++
 sheep/group.c            |  9 ++++++-
 sheep/md.c               | 64 +++++++++++++++++++++++++++++++-----------------
 sheep/sheep_priv.h       | 24 ++++++++++++++++++
 6 files changed, 98 insertions(+), 26 deletions(-)

diff --git a/dog/cluster.c b/dog/cluster.c
index 4af1e7c..7c5bc87 100644
--- a/dog/cluster.c
+++ b/dog/cluster.c
@@ -123,6 +123,10 @@ static int cluster_format(int argc, char **argv)
 	if (cluster_cmd_data.strict)
 		hdr.cluster.flags |= SD_CLUSTER_FLAG_STRICT;
 
+#ifdef HAVE_DISKVNODES
+	hdr.cluster.flags |= SD_CLUSTER_FLAG_DISKMODE;
+#endif
+
 	printf("using backend %s store\n", store_name);
 	ret = dog_exec_req(&sd_nid, &hdr, store_name);
 	if (ret < 0)
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 628bde7..488d6d9 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -42,7 +42,6 @@
  * Currently, only zookeeper driver support SD_MAX_NODES nodes because
  * its message buffer size is large enough to hold nodes[SD_MAX_NODES].
  */
-#define SD_MAX_NODES 6144
 #define SD_DEFAULT_VNODES 128
 
 /*
@@ -149,7 +148,13 @@ struct node_id {
 	uint8_t pad[4];
 };
 
-#define SD_NODE_SIZE 80
+struct disk_info {
+	uint64_t disk_id;
+	uint64_t disk_space;
+};
+
+#define DISK_MAX     32
+#define WEIGHT_MIN   (1ULL << 32)       /* 4G */
 
 struct sd_node {
 	struct rb_node  rb;
@@ -157,6 +162,15 @@ struct sd_node {
 	uint16_t	nr_vnodes;
 	uint32_t	zone;
 	uint64_t        space;
+#ifdef HAVE_DISKVNODES
+	#define SD_MAX_NODES 830
+	#define SD_NODE_SIZE (80 + sizeof(struct disk_info) * DISK_MAX)
+	struct disk_info disks[DISK_MAX];
+#else
+	#define SD_MAX_NODES 6144
+	#define SD_NODE_SIZE 80
+	struct disk_info disks[0];
+#endif
 };
 
 /*
diff --git a/include/sheep.h b/include/sheep.h
index f7f5c48..ea376cc 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -276,4 +276,9 @@ static inline struct sd_node *str_to_node(const char *str, struct sd_node *id)
 	return id;
 }
 
+static inline bool is_cluster_diskmode(const struct cluster_info *cinfo)
+{
+	return (cinfo->flags & SD_CLUSTER_FLAG_DISKMODE) > 0;
+}
+
 #endif
diff --git a/sheep/group.c b/sheep/group.c
index 4114dfb..1e861bd 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -965,7 +965,12 @@ static void update_node_size(struct sd_node *node)
 	if (unlikely(!n))
 		panic("can't find %s", node_to_str(node));
 	n->space = node->space;
-
+	if (is_cluster_diskmode(&sys->cinfo)) {
+		memset(n->disks, 0, sizeof(struct disk_info) * DISK_MAX);
+		for (int i = 0; i < DISK_MAX; i++)
+			if (node->disks[i].disk_id)
+				n->disks[i] = node->disks[i];
+	}
 	put_vnode_info(cur_vinfo);
 }
 
@@ -1026,6 +1031,8 @@ int create_cluster(int port, int64_t zone, int nr_vnodes,
 
 	sys->this_node.space = sys->disk_space;
 
+	update_node_disks();
+
 	sys->cinfo.epoch = get_latest_epoch();
 	if (sys->cinfo.epoch) {
 		sys->cinfo.nr_nodes = epoch_log_read(sys->cinfo.epoch,
diff --git a/sheep/md.c b/sheep/md.c
index 8154ee6..54d2f8c 100644
--- a/sheep/md.c
+++ b/sheep/md.c
@@ -17,27 +17,7 @@
 
 #define NONE_EXIST_PATH "/all/disks/are/broken/,ps/əʌo7/!"
 
-struct disk {
-	struct rb_node rb;
-	char path[PATH_MAX];
-	uint64_t space;
-};
-
-struct vdisk {
-	struct rb_node rb;
-	struct disk *disk;
-	uint64_t hash;
-};
-
-struct md {
-	struct rb_root vroot;
-	struct rb_root root;
-	struct sd_rw_lock lock;
-	uint64_t space;
-	uint32_t nr_disks;
-};
-
-static struct md md = {
+struct md md = {
 	.vroot = RB_ROOT,
 	.root = RB_ROOT,
 	.lock = SD_RW_LOCK_INITIALIZER,
@@ -87,7 +67,7 @@ static struct vdisk *oid_to_vdisk(uint64_t oid)
 	return hval_to_vdisk(sd_hash_oid(oid));
 }
 
-static void create_vdisks(struct disk *disk)
+static void create_vdisks(const struct disk *disk)
 {
 	uint64_t hval = sd_hash(disk->path, strlen(disk->path));
 	int nr = vdisk_number(disk);
@@ -751,6 +731,42 @@ static inline void md_del_disk(const char *path)
 	md_remove_disk(disk);
 }
 
+#ifdef HAVE_DISKVNODES
+void update_node_disks(void)
+{
+	const struct disk *disk;
+	int i = 0;
+	bool rb_empty = false;
+
+	if (!sys)
+		return;
+
+	memset(sys->this_node.disks, 0, sizeof(struct disk_info) * DISK_MAX);
+	sd_read_lock(&md.lock);
+	rb_for_each_entry(disk, &md.root, rb) {
+		sys->this_node.disks[i].disk_id =
+			sd_hash(disk->path, strlen(disk->path));
+		sys->this_node.disks[i].disk_space = disk->space;
+		i++;
+	}
+	sd_rw_unlock(&md.lock);
+
+	if (RB_EMPTY_ROOT(&md.vroot))
+		rb_empty = true;
+	sd_write_lock(&md.lock);
+	rb_for_each_entry(disk, &md.root, rb) {
+		if (!rb_empty)
+			remove_vdisks(disk);
+		create_vdisks(disk);
+	}
+	sd_rw_unlock(&md.lock);
+}
+#else
+void update_node_disks(void)
+{
+}
+#endif
+
 static int do_plug_unplug(char *disks, bool plug)
 {
 	const char *path;
@@ -776,8 +792,10 @@ static int do_plug_unplug(char *disks, bool plug)
 out:
 	sd_rw_unlock(&md.lock);
 
-	if (ret == SD_RES_SUCCESS)
+	if (ret == SD_RES_SUCCESS) {
+		update_node_disks();
 		kick_recover();
+	}
 
 	return ret;
 }
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index fddd641..693171c 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -164,6 +164,30 @@ struct system_info {
 	struct sd_stat stat;
 };
 
+struct disk {
+	struct rb_node rb;
+	char path[PATH_MAX];
+	uint64_t space;
+};
+
+struct vdisk {
+	struct rb_node rb;
+	const struct disk *disk;
+	uint64_t hash;
+};
+
+struct md {
+	struct rb_root vroot;
+	struct rb_root root;
+	struct sd_rw_lock lock;
+	uint64_t space;
+	uint32_t nr_disks;
+};
+
+extern struct md md;
+
+void update_node_disks(void);
+
 struct siocb {
 	uint32_t epoch;
 	void *buf;
-- 
1.7.12.4




More information about the sheepdog mailing list