[Sheepdog] [PATCH] sheep: use corosync nodeid as a default zone id value

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Tue Aug 9 15:55:47 CEST 2011


Usually, we don't want to replicate data in the same machine.  This
patch makes sheep daemons in the same node use the same zone id.

If you want to assign a zone id manually (e.g. enable rack-aware data
placement, emulate multiple nodes in one physical machine), specify it
in the sheep command line options explicitly with a '-z' option.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 collie/collie.c    |    8 ++++----
 include/sheep.h    |    6 ++----
 sheep/group.c      |   17 +++++++++--------
 sheep/sheep.c      |   18 +++++++++++++-----
 sheep/sheep_priv.h |    2 +-
 sheep/store.c      |   11 +++--------
 6 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/collie/collie.c b/collie/collie.c
index 0b307a5..c5eae56 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -708,8 +708,8 @@ static int node_list(int argc, char **argv)
 	int i;
 
 	if (!raw_output) {
-		printf("   Idx - Host:Port          Vnodes   Zone\n");
-		printf("-----------------------------------------\n");
+		printf("   Idx - Host:Port          Vnodes       Zone\n");
+		printf("---------------------------------------------\n");
 	}
 	for (i = 0; i < nr_nodes; i++) {
 		char data[128];
@@ -720,13 +720,13 @@ static int node_list(int argc, char **argv)
 		if (i == master_idx) {
 			if (highlight)
 				printf(TEXT_BOLD);
-			printf(raw_output ? "* %d %s %d %d\n" : "* %4d - %-20s\t%d\t%d\n",
+			printf(raw_output ? "* %d %s %d %d\n" : "* %4d - %-20s\t%d%11d\n",
 			       i, data, node_list_entries[i].nr_vnodes,
 			       node_list_entries[i].zone);
 			if (highlight)
 				printf(TEXT_NORMAL);
 		} else
-			printf(raw_output ? "- %d %s %d %d\n" : "  %4d - %-20s\t%d\t%d\n",
+			printf(raw_output ? "- %d %s %d %d\n" : "  %4d - %-20s\t%d%11d\n",
 			       i, data, node_list_entries[i].nr_vnodes,
 			       node_list_entries[i].zone);
 	}
diff --git a/include/sheep.h b/include/sheep.h
index ea78c1b..31516d9 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -128,8 +128,7 @@ struct sheepdog_node_list_entry {
 	uint8_t         addr[16];
 	uint16_t        port;
 	uint16_t	nr_vnodes;
-	uint16_t	zone;
-	uint16_t	pad;
+	uint32_t	zone;
 };
 
 struct sheepdog_vnode_list_entry {
@@ -137,8 +136,7 @@ struct sheepdog_vnode_list_entry {
 	uint8_t         addr[16];
 	uint16_t        port;
 	uint16_t	node_idx;
-	uint16_t	zone;
-	uint16_t	pad;
+	uint32_t	zone;
 };
 
 struct epoch_log {
diff --git a/sheep/group.c b/sheep/group.c
index 02ef40b..f865e8f 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -173,8 +173,8 @@ static void build_node_list(struct list_head *node_list,
 			    int *nr_nodes, int *nr_zones)
 {
 	struct node *node;
-	int nr = 0, nr_zero_zones = 0, i;
-	uint16_t zones[SD_MAX_REDUNDANCY];
+	int nr = 0, i;
+	uint32_t zones[SD_MAX_REDUNDANCY];
 
 	if (nr_zones)
 		*nr_zones = 0;
@@ -192,17 +192,13 @@ static void build_node_list(struct list_head *node_list,
 				}
 				if (i == *nr_zones)
 					zones[(*nr_zones)++] = node->ent.zone;
-			} else
-				nr_zero_zones++;
+			}
 		}
 	}
 	if (entries)
 		qsort(entries, nr, sizeof(*entries), node_cmp);
 	if (nr_nodes)
 		*nr_nodes = nr;
-	if (nr_zones)
-		/* Zero zone nodes behave as if they have different zones */
-		*nr_zones += nr_zero_zones;
 }
 
 int get_ordered_sd_node_list(struct sheepdog_node_list_entry *entries)
@@ -1696,7 +1692,7 @@ static void set_addr(unsigned int nodeid, int port)
 	vprintf(SDOG_INFO "addr = %s, port = %d\n", tmp, port);
 }
 
-int create_cluster(int port)
+int create_cluster(int port, int64_t zone)
 {
 	int fd, ret;
 	cpg_handle_t cpg_handle;
@@ -1742,6 +1738,11 @@ join_retry:
 	set_addr(nodeid, port);
 	sys->this_node.port = port;
 	sys->this_node.nr_vnodes = SD_DEFAULT_VNODES;
+	if (zone == -1)
+		sys->this_node.zone = nodeid;
+	else
+		sys->this_node.zone = zone;
+	dprintf("zone id = %u\n", sys->this_node.zone);
 
 	if (get_latest_epoch() == 0)
 		sys->status = SD_STATUS_WAIT_FOR_FORMAT;
diff --git a/sheep/sheep.c b/sheep/sheep.c
index 9ed8b90..695aa95 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -73,6 +73,8 @@ int main(int argc, char **argv)
 	int is_daemon = 1;
 	int log_level = LOG_INFO;
 	char path[PATH_MAX];
+	int64_t zone = -1;
+	char *p;
 
 	signal(SIGPIPE, SIG_IGN);
 
@@ -97,12 +99,18 @@ int main(int argc, char **argv)
 			sys->use_directio = 1;
 			break;
 		case 'z':
-			sys->this_node.zone = atoi(optarg);
-			if (sys->this_node.zone == 0) {
-				eprintf("zone id must be between 1 and 65535\n");
+			zone = strtol(optarg, &p, 10);
+			if (optarg == p) {
+				eprintf("%s is not an integer\n", optarg);
 				exit(1);
 			}
-			dprintf("zone id = %d\n", sys->this_node.zone);
+
+			if (zone < 0 || UINT32_MAX < zone) {
+				eprintf("zone id must be between 0 and %u\n",
+					UINT32_MAX);
+				exit(1);
+			}
+			sys->this_node.zone = zone;
 			break;
 		case 'h':
 			usage(0);
@@ -145,7 +153,7 @@ int main(int argc, char **argv)
 	if (ret)
 		exit(1);
 
-	ret = create_cluster(port);
+	ret = create_cluster(port, zone);
 	if (ret) {
 		eprintf("failed to create sheepdog cluster.\n");
 		exit(1);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index ae5bb03..f8f8c65 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -174,7 +174,7 @@ int is_access_local(struct sheepdog_vnode_list_entry *e, int nr_nodes,
 
 void resume_pending_requests(void);
 
-int create_cluster(int port);
+int create_cluster(int port, int64_t zone);
 int leave_cluster(void);
 
 void start_cpg_event_work(void);
diff --git a/sheep/store.c b/sheep/store.c
index 947c7e2..79a1af3 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1042,18 +1042,13 @@ uint64_t get_cluster_ctime(void)
 static int get_max_copies(struct sheepdog_node_list_entry *entries, int nr)
 {
 	int i, j;
-	unsigned int nr_zones = 0, nr_zero_zones = 0;
-	uint16_t zones[SD_MAX_REDUNDANCY];
+	unsigned int nr_zones = 0;
+	uint32_t zones[SD_MAX_REDUNDANCY];
 
 	for (i = 0; i < nr; i++) {
 		if (nr_zones >= ARRAY_SIZE(zones))
 			break;
 
-		if (entries[i].zone == 0) {
-			nr_zero_zones++;
-			continue;
-		}
-
 		for (j = 0; j < nr_zones; j++) {
 			if (zones[j] == entries[i].zone)
 				break;
@@ -1062,7 +1057,7 @@ static int get_max_copies(struct sheepdog_node_list_entry *entries, int nr)
 			zones[nr_zones++] = entries[i].zone;
 	}
 
-	return min(sys->nr_sobjs, nr_zones + nr_zero_zones);
+	return min(sys->nr_sobjs, nr_zones);
 }
 
 /*
-- 
1.7.2.5




More information about the sheepdog mailing list