Usually, we don't want to replicate data in the same machine. This patch makes sheep daemons in the same node use the same zone id. If you want to assign a zone id manually (e.g. enable rack-aware data placement, emulate multiple nodes in one physical machine), specify it in the sheep command line options explicitly with a '-z' option. Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp> --- collie/collie.c | 8 ++++---- include/sheep.h | 6 ++---- sheep/group.c | 17 +++++++++-------- sheep/sheep.c | 18 +++++++++++++----- sheep/sheep_priv.h | 2 +- sheep/store.c | 11 +++-------- 6 files changed, 32 insertions(+), 30 deletions(-) diff --git a/collie/collie.c b/collie/collie.c index 0b307a5..c5eae56 100644 --- a/collie/collie.c +++ b/collie/collie.c @@ -708,8 +708,8 @@ static int node_list(int argc, char **argv) int i; if (!raw_output) { - printf(" Idx - Host:Port Vnodes Zone\n"); - printf("-----------------------------------------\n"); + printf(" Idx - Host:Port Vnodes Zone\n"); + printf("---------------------------------------------\n"); } for (i = 0; i < nr_nodes; i++) { char data[128]; @@ -720,13 +720,13 @@ static int node_list(int argc, char **argv) if (i == master_idx) { if (highlight) printf(TEXT_BOLD); - printf(raw_output ? "* %d %s %d %d\n" : "* %4d - %-20s\t%d\t%d\n", + printf(raw_output ? "* %d %s %d %d\n" : "* %4d - %-20s\t%d%11d\n", i, data, node_list_entries[i].nr_vnodes, node_list_entries[i].zone); if (highlight) printf(TEXT_NORMAL); } else - printf(raw_output ? "- %d %s %d %d\n" : " %4d - %-20s\t%d\t%d\n", + printf(raw_output ? "- %d %s %d %d\n" : " %4d - %-20s\t%d%11d\n", i, data, node_list_entries[i].nr_vnodes, node_list_entries[i].zone); } diff --git a/include/sheep.h b/include/sheep.h index ea78c1b..31516d9 100644 --- a/include/sheep.h +++ b/include/sheep.h @@ -128,8 +128,7 @@ struct sheepdog_node_list_entry { uint8_t addr[16]; uint16_t port; uint16_t nr_vnodes; - uint16_t zone; - uint16_t pad; + uint32_t zone; }; struct sheepdog_vnode_list_entry { @@ -137,8 +136,7 @@ struct sheepdog_vnode_list_entry { uint8_t addr[16]; uint16_t port; uint16_t node_idx; - uint16_t zone; - uint16_t pad; + uint32_t zone; }; struct epoch_log { diff --git a/sheep/group.c b/sheep/group.c index 02ef40b..f865e8f 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -173,8 +173,8 @@ static void build_node_list(struct list_head *node_list, int *nr_nodes, int *nr_zones) { struct node *node; - int nr = 0, nr_zero_zones = 0, i; - uint16_t zones[SD_MAX_REDUNDANCY]; + int nr = 0, i; + uint32_t zones[SD_MAX_REDUNDANCY]; if (nr_zones) *nr_zones = 0; @@ -192,17 +192,13 @@ static void build_node_list(struct list_head *node_list, } if (i == *nr_zones) zones[(*nr_zones)++] = node->ent.zone; - } else - nr_zero_zones++; + } } } if (entries) qsort(entries, nr, sizeof(*entries), node_cmp); if (nr_nodes) *nr_nodes = nr; - if (nr_zones) - /* Zero zone nodes behave as if they have different zones */ - *nr_zones += nr_zero_zones; } int get_ordered_sd_node_list(struct sheepdog_node_list_entry *entries) @@ -1696,7 +1692,7 @@ static void set_addr(unsigned int nodeid, int port) vprintf(SDOG_INFO "addr = %s, port = %d\n", tmp, port); } -int create_cluster(int port) +int create_cluster(int port, int64_t zone) { int fd, ret; cpg_handle_t cpg_handle; @@ -1742,6 +1738,11 @@ join_retry: set_addr(nodeid, port); sys->this_node.port = port; sys->this_node.nr_vnodes = SD_DEFAULT_VNODES; + if (zone == -1) + sys->this_node.zone = nodeid; + else + sys->this_node.zone = zone; + dprintf("zone id = %u\n", sys->this_node.zone); if (get_latest_epoch() == 0) sys->status = SD_STATUS_WAIT_FOR_FORMAT; diff --git a/sheep/sheep.c b/sheep/sheep.c index 9ed8b90..695aa95 100644 --- a/sheep/sheep.c +++ b/sheep/sheep.c @@ -73,6 +73,8 @@ int main(int argc, char **argv) int is_daemon = 1; int log_level = LOG_INFO; char path[PATH_MAX]; + int64_t zone = -1; + char *p; signal(SIGPIPE, SIG_IGN); @@ -97,12 +99,18 @@ int main(int argc, char **argv) sys->use_directio = 1; break; case 'z': - sys->this_node.zone = atoi(optarg); - if (sys->this_node.zone == 0) { - eprintf("zone id must be between 1 and 65535\n"); + zone = strtol(optarg, &p, 10); + if (optarg == p) { + eprintf("%s is not an integer\n", optarg); exit(1); } - dprintf("zone id = %d\n", sys->this_node.zone); + + if (zone < 0 || UINT32_MAX < zone) { + eprintf("zone id must be between 0 and %u\n", + UINT32_MAX); + exit(1); + } + sys->this_node.zone = zone; break; case 'h': usage(0); @@ -145,7 +153,7 @@ int main(int argc, char **argv) if (ret) exit(1); - ret = create_cluster(port); + ret = create_cluster(port, zone); if (ret) { eprintf("failed to create sheepdog cluster.\n"); exit(1); diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index ae5bb03..f8f8c65 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -174,7 +174,7 @@ int is_access_local(struct sheepdog_vnode_list_entry *e, int nr_nodes, void resume_pending_requests(void); -int create_cluster(int port); +int create_cluster(int port, int64_t zone); int leave_cluster(void); void start_cpg_event_work(void); diff --git a/sheep/store.c b/sheep/store.c index 947c7e2..79a1af3 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -1042,18 +1042,13 @@ uint64_t get_cluster_ctime(void) static int get_max_copies(struct sheepdog_node_list_entry *entries, int nr) { int i, j; - unsigned int nr_zones = 0, nr_zero_zones = 0; - uint16_t zones[SD_MAX_REDUNDANCY]; + unsigned int nr_zones = 0; + uint32_t zones[SD_MAX_REDUNDANCY]; for (i = 0; i < nr; i++) { if (nr_zones >= ARRAY_SIZE(zones)) break; - if (entries[i].zone == 0) { - nr_zero_zones++; - continue; - } - for (j = 0; j < nr_zones; j++) { if (zones[j] == entries[i].zone) break; @@ -1062,7 +1057,7 @@ static int get_max_copies(struct sheepdog_node_list_entry *entries, int nr) zones[nr_zones++] = entries[i].zone; } - return min(sys->nr_sobjs, nr_zones + nr_zero_zones); + return min(sys->nr_sobjs, nr_zones); } /* -- 1.7.2.5 |