[sheepdog] [PATCH v2 3/4] sheep: recalculate the vnodes number when epoch changes

levin li levin108 at gmail.com
Fri Aug 3 14:50:10 CEST 2012


From: levin li <xingke.lwp at taobao.com>

When epoch changes, new node join or old node leave, we should
recalculate the vnode_info for every sd_node, and the disk space
is stored in sd_node, transfered to every other node together with
join message.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 include/internal_proto.h |    3 ++-
 sheep/cluster.h          |    1 +
 sheep/group.c            |   24 +++++++++++++++++++++++-
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/include/internal_proto.h b/include/internal_proto.h
index 4f1b0a0..0394e05 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -19,7 +19,7 @@
 
 #include <stdint.h>
 
-#define SD_SHEEP_PROTO_VER 0x05
+#define SD_SHEEP_PROTO_VER 0x06
 
 #define SD_DEFAULT_REDUNDANCY 3
 #define SD_MAX_REDUNDANCY 8
@@ -177,6 +177,7 @@ struct sd_node {
 	struct node_id  nid;
 	uint16_t	nr_vnodes;
 	uint32_t	zone;
+	uint32_t        space;
 };
 
 struct epoch_log {
diff --git a/sheep/cluster.h b/sheep/cluster.h
index 153e33f..75596a8 100644
--- a/sheep/cluster.h
+++ b/sheep/cluster.h
@@ -198,5 +198,6 @@ void sd_notify_handler(struct sd_node *sender, void *msg, size_t msg_len);
 bool sd_block_handler(struct sd_node *sender);
 enum cluster_join_result sd_check_join_cb(struct sd_node *joining,
 		void *opaque);
+void recalculate_vnodes(struct sd_node *nodes, int nr_nodes);
 
 #endif
diff --git a/sheep/group.c b/sheep/group.c
index cb86050..03044cc 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -211,7 +211,9 @@ struct vnode_info *alloc_vnode_info(struct sd_node *nodes,
 	memcpy(vnode_info->nodes, nodes, sizeof(*nodes) * nr_nodes);
 	qsort(vnode_info->nodes, nr_nodes, sizeof(*nodes), node_id_cmp);
 
-	vnode_info->nr_vnodes = nodes_to_vnodes(nodes, nr_nodes,
+	recalculate_vnodes(vnode_info->nodes, nr_nodes);
+
+	vnode_info->nr_vnodes = nodes_to_vnodes(vnode_info->nodes, nr_nodes,
 						vnode_info->vnodes);
 	vnode_info->nr_zones = get_zones_nr_from(nodes, nr_nodes);
 	uatomic_set(&vnode_info->refcnt, 1);
@@ -806,6 +808,24 @@ static void prepare_recovery(struct sd_node *joined,
 		current_vnode_info = alloc_vnode_info(nodes, nr_nodes);
 }
 
+void recalculate_vnodes(struct sd_node *nodes, int nr_nodes)
+{
+	int i;
+	uint64_t avg_size = 0;
+	float factor;
+
+	for (i = 0; i < nr_nodes; i++)
+		avg_size += nodes[i].space;
+	avg_size /= nr_nodes;
+
+	for (i = 0; i < nr_nodes; i++) {
+		factor = (float)nodes[i].space / (float)avg_size;
+		nodes[i].nr_vnodes = SD_DEFAULT_VNODES * factor;
+		dprintf("node %d has %d vnodes, free space %" PRIu32 "\n",
+			nodes[i].nid.port, nodes[i].nr_vnodes, nodes[i].space);
+	}
+}
+
 static void update_cluster_info(struct join_message *msg,
 				struct sd_node *joined, struct sd_node *nodes,
 				size_t nr_nodes)
@@ -1196,6 +1216,8 @@ int create_cluster(int port, int64_t zone, int nr_vnodes,
 		sys->this_node.zone = zone;
 	dprintf("zone id = %u\n", sys->this_node.zone);
 
+	sys->this_node.space = sys->disk_space;
+
 	if (get_latest_epoch() > 0) {
 		sys->status = SD_STATUS_WAIT_FOR_JOIN;
 
-- 
1.7.1




More information about the sheepdog mailing list