[sheepdog] [PATCH 7/7] test/unit: add test to check hash function

MORITA Kazutaka morita.kazutaka at gmail.com
Fri Aug 30 11:32:09 CEST 2013


From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

This unittest checks

 - the existing virtual node ids after node join/leave events.
 - the existing virtual disk ids after disk plug/unplug events.
 - dispersion of sd_hash() with a chi-squared test.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 tests/unit/sheep/Makefile.am  |    5 +-
 tests/unit/sheep/mock_group.c |    6 +
 tests/unit/sheep/test_hash.c  |  537 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 546 insertions(+), 2 deletions(-)
 create mode 100644 tests/unit/sheep/test_hash.c

diff --git a/tests/unit/sheep/Makefile.am b/tests/unit/sheep/Makefile.am
index e27899a..92a0cb9 100644
--- a/tests/unit/sheep/Makefile.am
+++ b/tests/unit/sheep/Makefile.am
@@ -1,6 +1,6 @@
 MAINTAINERCLEANFILES	= Makefile.in
 
-TESTS			= test_vdi test_cluster_driver
+TESTS			= test_vdi test_cluster_driver test_hash
 
 check_PROGRAMS		= ${TESTS}
 
@@ -10,7 +10,7 @@ INCLUDES		= -I$(top_srcdir)/include			\
 			  @CHECK_CFLAGS@
 
 LIBS			= $(top_srcdir)/lib/libsheepdog.a		\
-			  ../mock/libmock.a -lpthread			\
+			  ../mock/libmock.a -lpthread -lm		\
 			  @CHECK_LIBS@
 
 test_vdi_SOURCES	= test_vdi.c mock_sheep.c mock_store.c		\
@@ -27,6 +27,7 @@ test_cluster_driver_CFLAGS	+= -DBUILD_ZOOKEEPER
 LIBS += -lzookeeper_mt
 endif
 
+test_hash_SOURCES	= test_hash.c mock_sheep.c mock_group.c
 
 clean-local:
 	rm -f ${check_PROGRAMS} *.o
diff --git a/tests/unit/sheep/mock_group.c b/tests/unit/sheep/mock_group.c
index 27b9655..52ec307 100644
--- a/tests/unit/sheep/mock_group.c
+++ b/tests/unit/sheep/mock_group.c
@@ -13,6 +13,7 @@
 
 #include "mock.h"
 
+#include "sheep_priv.h"
 #include "cluster.h"
 
 MOCK_VOID_METHOD(sd_accept_handler, const struct sd_node *joined,
@@ -28,3 +29,8 @@ MOCK_VOID_METHOD(sd_notify_handler, const struct sd_node *sender, void *msg,
 MOCK_METHOD(sd_block_handler, bool, true, const struct sd_node *sender)
 MOCK_METHOD(sd_reconnect_handler, int, 0)
 MOCK_VOID_METHOD(sd_update_node_handler, struct sd_node *node)
+
+MOCK_METHOD(get_vnode_info, struct vnode_info *, NULL)
+MOCK_METHOD(start_recovery, int, 0, struct vnode_info *cur_vinfo,
+	    struct vnode_info *old_vinfo, bool epoch_lifted)
+MOCK_VOID_METHOD(put_vnode_info, struct vnode_info *vnode_info)
diff --git a/tests/unit/sheep/test_hash.c b/tests/unit/sheep/test_hash.c
new file mode 100644
index 0000000..0c7320f
--- /dev/null
+++ b/tests/unit/sheep/test_hash.c
@@ -0,0 +1,537 @@
+#include <check.h>
+#include <math.h>
+
+#include "md.c"
+
+/* Constant values for the chi-squared test */
+#define DATA_SIZE 1024
+#define DF 9 /* a degree of freedom */
+#define CV 16.919 /* a critical value (p=0.05, df=9) */
+#define EXP ((double)DATA_SIZE / (DF + 1)) /* an expected value */
+
+/* uniform distribution */
+const double uniform_dist[DF + 1] = {
+	0,
+	(double)UINT64_MAX / 10,
+	(double)UINT64_MAX / 10 * 2,
+	(double)UINT64_MAX / 10 * 3,
+	(double)UINT64_MAX / 10 * 4,
+	(double)UINT64_MAX / 10 * 5,
+	(double)UINT64_MAX / 10 * 6,
+	(double)UINT64_MAX / 10 * 7,
+	(double)UINT64_MAX / 10 * 8,
+	(double)UINT64_MAX / 10 * 9,
+};
+
+/* chi-squared distribution with 9 degrees of freedom */
+const double chi2_dist[DF + 1] = {
+	0.000,	4.168,	5.380,	6.393,	7.357,
+	8.343,	9.414,	10.656,	12.242,	14.684,
+};
+
+
+/* return true if s2 is subset of s1 */
+#define is_subset(s1, nr_s1, s2, nr_s2, cmp)			\
+({								\
+	bool ____ret = true;					\
+	for (int __i = 0; __i < nr_s2; __i++) {			\
+		if (!xbsearch((s2) + __i, s1, nr_s1, cmp)) {	\
+			____ret = false;			\
+			break;					\
+		}						\
+	}							\
+	____ret;						\
+})
+
+/* calculate a chi-squared value */
+static double get_chi2(const double *data, const double *dist)
+{
+	double chi2 = 0.0;
+	int counts[DF + 1] = {};
+
+	for (int i = 0; i < DATA_SIZE; i++) {
+		for (int j = DF; j >= 0; j--) {
+			if (dist[j] <= data[i]) {
+				counts[j]++;
+				break;
+			}
+		}
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(counts); i++)
+		chi2 += pow((double)counts[i] - EXP, 2) / EXP;
+
+	return chi2;
+}
+
+/* do a chi-squared test */
+static void chi2_test(void (*gen_data)(double *data, int idx))
+{
+	double sample_data[DATA_SIZE], chi2_data[DATA_SIZE], chi2;
+
+	for (int i = 0; i < ARRAY_SIZE(chi2_data); i++) {
+		gen_data(sample_data, i);
+		chi2_data[i] = get_chi2(sample_data, uniform_dist);
+	}
+
+	chi2 = get_chi2(chi2_data, chi2_dist);
+
+	ck_assert_msg(chi2 < CV, "chi-square test failed (chi-square: %lf,"
+		      " critical value: %lf)", chi2, CV);
+}
+
+static void (*gen_basic_data)(double *data, int idx);
+
+/* generate sample data with sd_hash() */
+static void gen_sd_hash_data(double *data, int idx)
+{
+	static int n;
+
+	for (int i = 0; i < DATA_SIZE; i++) {
+		data[i] = sd_hash(&n, sizeof(n));
+		n++;
+	}
+}
+
+static void basic1_setup(void)
+{
+	gen_basic_data = gen_sd_hash_data;
+}
+
+/* generate sample data with sd_hash_next() */
+static void gen_sd_hash_next_data(double *data, int idx)
+{
+	uint64_t hval = sd_hash(&idx, sizeof(idx));
+
+	for (int i = 0; i < DATA_SIZE; i++) {
+		hval = sd_hash_next(hval);
+		data[i] = hval;
+	}
+}
+
+static void basic2_setup(void)
+{
+	gen_basic_data = gen_sd_hash_next_data;
+}
+
+START_TEST(test_basic_dispersion)
+{
+	chi2_test(gen_basic_data);
+}
+END_TEST
+
+static size_t (*gen_nodes)(struct sd_node *nodes, int idx);
+
+/* generate one disk which has many virtual disks */
+static size_t gen_many_vnodes(struct sd_node *nodes, int idx)
+{
+	memset(nodes, 0, sizeof(*nodes));
+
+	/* IPv4 10.0.0.1 */
+	nodes[0].nid.addr[12] = 10;
+	nodes[0].nid.addr[15] = 1;
+	nodes[0].nid.port = 7000 + idx;
+
+	nodes[0].nr_vnodes = DATA_SIZE;
+
+	return 1;
+}
+
+static void node1_setup(void)
+{
+	gen_nodes = gen_many_vnodes;
+}
+
+/* generate many daemons with one vnode on the same node */
+static size_t gen_many_daemons_one_vnode(struct sd_node *nodes, int idx)
+{
+	memset(nodes, 0, sizeof(*nodes) * DATA_SIZE);
+
+	for (int i = 0; i < DATA_SIZE; i++) {
+		/* IPv4 10.0.x.y */
+		nodes[i].nid.addr[12] = 10;
+		nodes[i].nid.addr[14] = idx / 256;
+		nodes[i].nid.addr[15] = idx % 256;
+		nodes[i].nid.port = 7000 + i;
+		nodes[i].nr_vnodes = 1;
+	}
+
+	return DATA_SIZE;
+}
+
+static void node2_setup(void)
+{
+	gen_nodes = gen_many_daemons_one_vnode;
+}
+
+/* generate many daemons with some vnodes on the same node */
+static size_t gen_many_daemons_some_vnodes(struct sd_node *nodes, int idx)
+{
+	memset(nodes, 0, sizeof(*nodes) * DATA_SIZE);
+
+	for (int i = 0; i < DATA_SIZE / 4; i++) {
+		/* IPv4 10.0.x.y */
+		nodes[i].nid.addr[12] = 10;
+		nodes[i].nid.addr[14] = idx / 256;
+		nodes[i].nid.addr[15] = idx % 256;
+		nodes[i].nid.port = 7000 + i;
+		nodes[i].nr_vnodes = 4;
+	}
+
+	return DATA_SIZE / 4;
+}
+
+static void node3_setup(void)
+{
+	gen_nodes = gen_many_daemons_some_vnodes;
+}
+
+/* generate many nodes who have only one virtual node */
+static size_t gen_many_nodes_one_vnode(struct sd_node *nodes, int idx)
+{
+	memset(nodes, 0, sizeof(*nodes) * DATA_SIZE);
+
+	for (int i = 0; i < DATA_SIZE; i++) {
+		/* IPv4 10.0.x.y */
+		nodes[i].nid.addr[12] = 10;
+		nodes[i].nid.addr[14] = i / 256;
+		nodes[i].nid.addr[15] = i % 256;
+		nodes[i].nid.port = 7000 + idx;
+		nodes[i].nr_vnodes = 1;
+	}
+
+	return DATA_SIZE;
+}
+
+static void node4_setup(void)
+{
+	gen_nodes = gen_many_nodes_one_vnode;
+}
+
+/* generate many nodes who have some virtual nodes */
+static size_t gen_many_nodes_some_vnodes(struct sd_node *nodes, int idx)
+{
+	memset(nodes, 0, sizeof(*nodes) * DATA_SIZE);
+
+	for (int i = 0; i < DATA_SIZE / 4; i++) {
+		/* IPv4 10.0.x.y */
+		nodes[i].nid.addr[12] = 10;
+		nodes[i].nid.addr[14] = 0;
+		nodes[i].nid.addr[15] = i;
+		nodes[i].nid.port = 7000 + idx;
+		nodes[i].nr_vnodes = 4;
+	}
+
+	return DATA_SIZE / 4;
+}
+
+static void node5_setup(void)
+{
+	gen_nodes = gen_many_nodes_some_vnodes;
+}
+
+/* check the existing vnodes don't change */
+START_TEST(test_nodes_update)
+{
+	size_t nr_vnodes;
+	size_t nr_vnodes_after;
+	struct sd_node nodes[DATA_SIZE];
+	struct sd_vnode vnodes[SD_MAX_VNODES];
+	struct sd_vnode vnodes_after[SD_MAX_VNODES];
+
+	gen_nodes(nodes, 0);
+
+	nr_vnodes = nodes_to_vnodes(nodes, 1, vnodes);
+	/* 1 node join */
+	nr_vnodes_after = nodes_to_vnodes(nodes, 2, vnodes_after);
+	ck_assert(is_subset(vnodes_after, nr_vnodes_after, vnodes,
+			    nr_vnodes, vnode_cmp));
+
+	nr_vnodes = nodes_to_vnodes(nodes, 100, vnodes);
+	/* 1 node join */
+	nr_vnodes_after = nodes_to_vnodes(nodes, 101, vnodes_after);
+	ck_assert(is_subset(vnodes_after, nr_vnodes_after, vnodes,
+			    nr_vnodes, vnode_cmp));
+	/* 100 nodes join */
+	nr_vnodes_after = nodes_to_vnodes(nodes, 200, vnodes_after);
+	ck_assert(is_subset(vnodes_after, nr_vnodes_after, vnodes,
+			    nr_vnodes, vnode_cmp));
+
+	nr_vnodes = nodes_to_vnodes(nodes, 2, vnodes);
+	/* 1 node leave */
+	nr_vnodes_after = nodes_to_vnodes(nodes + 1, 1, vnodes_after);
+	ck_assert(is_subset(vnodes, nr_vnodes, vnodes_after,
+			    nr_vnodes_after, vnode_cmp));
+
+	nr_vnodes = nodes_to_vnodes(nodes, 200, vnodes);
+	/* 1 node leave */
+	nr_vnodes_after = nodes_to_vnodes(nodes + 1, 199, vnodes_after);
+	ck_assert(is_subset(vnodes, nr_vnodes, vnodes_after,
+			    nr_vnodes_after, vnode_cmp));
+	/* 100 nodes leave */
+	nr_vnodes_after = nodes_to_vnodes(nodes + 50, 100, vnodes_after);
+	ck_assert(is_subset(vnodes, nr_vnodes, vnodes_after,
+			    nr_vnodes_after, vnode_cmp));
+}
+END_TEST
+
+static void gen_data_from_nodes(double *data, int idx)
+{
+	struct sd_node nodes[DATA_SIZE];
+	struct sd_vnode vnodes[DATA_SIZE];
+	int nr_nodes;
+	int nr_vnodes;
+
+	nr_nodes = gen_nodes(nodes, idx);
+	nr_vnodes = nodes_to_vnodes(nodes, nr_nodes, vnodes);
+	ck_assert_int_eq(nr_vnodes, DATA_SIZE);
+
+	for (int i = 0; i < DATA_SIZE; i++)
+		data[i] = vnodes[i].id;
+}
+
+START_TEST(test_nodes_dispersion)
+{
+	chi2_test(gen_data_from_nodes);
+}
+END_TEST
+
+static size_t (*gen_disks)(struct disk *disks, int idx);
+
+/* generate one disk who has many virtual disks */
+static size_t gen_many_vdisks(struct disk *disks, int idx)
+{
+	memset(disks, 0, sizeof(*disks));
+
+	snprintf(disks[0].path, sizeof(disks[0].path), "/%x", idx);
+	disks[0].nr_vdisks = DATA_SIZE;
+
+	return 1;
+}
+
+static void disk1_setup(void)
+{
+	gen_disks = gen_many_vdisks;
+}
+
+/* generate many disk who have only one virtual disk */
+static size_t gen_many_disks_one_vdisk(struct disk *disks, int idx)
+{
+	memset(disks, 0, sizeof(*disks) * DATA_SIZE);
+
+	for (int i = 0; i < DATA_SIZE; i++) {
+		snprintf(disks[i].path, sizeof(disks[i].path),
+			 "/%x/%x", idx, i);
+		disks[i].nr_vdisks = 1;
+	}
+
+	return DATA_SIZE;
+}
+
+static void disk2_setup(void)
+{
+	gen_disks = gen_many_disks_one_vdisk;
+}
+
+/* generate many disk who have some virtual disks */
+static size_t gen_many_disks_some_vdisks(struct disk *disks, int idx)
+{
+	memset(disks, 0, sizeof(*disks) * DATA_SIZE);
+
+	for (int i = 0; i < DATA_SIZE / 4; i++) {
+		snprintf(disks[i].path, sizeof(disks[i].path),
+			 "/%x/%x", idx, i);
+		disks[i].nr_vdisks = 4;
+	}
+
+	return DATA_SIZE / 4;
+}
+
+static void disk3_setup(void)
+{
+	gen_disks = gen_many_disks_some_vdisks;
+}
+
+START_TEST(test_disks_update)
+{
+	size_t nr_vdisks;
+	size_t nr_vdisks_after;
+	struct disk disks[DATA_SIZE];
+	struct vdisk vdisks[MD_MAX_VDISK];
+	struct vdisk vdisks_after[MD_MAX_VDISK];
+
+	gen_disks(disks, 0);
+
+	nr_vdisks = disks_to_vdisks(disks, 1, vdisks);
+	/* add 1 disk */
+	nr_vdisks_after = disks_to_vdisks(disks, 2, vdisks_after);
+	ck_assert(is_subset(vdisks_after, nr_vdisks_after, vdisks,
+			    nr_vdisks, vdisk_cmp));
+
+	nr_vdisks = disks_to_vdisks(disks, 30, vdisks);
+	/* add 1 disk */
+	nr_vdisks_after = disks_to_vdisks(disks, 31, vdisks_after);
+	ck_assert(is_subset(vdisks_after, nr_vdisks_after, vdisks,
+			    nr_vdisks, vdisk_cmp));
+	/* add 20 disks */
+	nr_vdisks_after = disks_to_vdisks(disks, 50, vdisks_after);
+	ck_assert(is_subset(vdisks_after, nr_vdisks_after, vdisks,
+			    nr_vdisks, vdisk_cmp));
+
+	nr_vdisks = disks_to_vdisks(disks, 2, vdisks);
+	/* remove 1 disk */
+	nr_vdisks_after = disks_to_vdisks(disks + 1, 1, vdisks_after);
+	ck_assert(is_subset(vdisks, nr_vdisks, vdisks_after,
+			    nr_vdisks_after, vdisk_cmp));
+
+	nr_vdisks = disks_to_vdisks(disks, 50, vdisks);
+	/* remove 1 disk */
+	nr_vdisks_after = disks_to_vdisks(disks + 1, 49, vdisks_after);
+	ck_assert(is_subset(vdisks, nr_vdisks, vdisks_after,
+			    nr_vdisks_after, vdisk_cmp));
+	/* remove 20 disks */
+	nr_vdisks_after = disks_to_vdisks(disks + 10, 30, vdisks_after);
+	ck_assert(is_subset(vdisks, nr_vdisks, vdisks_after,
+			    nr_vdisks_after, vdisk_cmp));
+}
+END_TEST
+
+static void gen_data_from_disks(double *data, int idx)
+{
+	struct disk disks[DATA_SIZE];
+	struct vdisk vdisks[DATA_SIZE];
+	int nr_disks;
+	int nr_vdisks;
+
+	nr_disks = gen_disks(disks, idx);
+	nr_vdisks = disks_to_vdisks(disks, nr_disks, vdisks);
+	ck_assert_int_eq(nr_vdisks, DATA_SIZE);
+
+	for (int i = 0; i < DATA_SIZE; i++)
+		data[i] = vdisks[i].id;
+}
+
+START_TEST(test_disks_dispersion)
+{
+	chi2_test(gen_data_from_disks);
+}
+END_TEST
+
+static void (*gen_objects)(uint64_t *objects, int idx);
+
+/* generate one vdi with many data objects */
+static void gen_data_objects(uint64_t *objects, int idx)
+{
+	for (int i = 0; i < DATA_SIZE; i++) {
+		uint64_t oid = vid_to_data_oid(idx, i);
+		objects[i] = sd_hash_oid(oid);
+	}
+}
+
+static void object1_setup(void)
+{
+	gen_objects = gen_data_objects;
+}
+
+/* generate many vdi objects */
+static void gen_vdi_objects(uint64_t *objects, int idx)
+{
+	for (int i = 0; i < DATA_SIZE; i++) {
+		uint64_t oid = vid_to_data_oid(idx * DATA_SIZE + i, 0);
+		objects[i] = sd_hash_oid(oid);
+	}
+}
+
+static void object2_setup(void)
+{
+	gen_objects = gen_vdi_objects;
+}
+
+static void gen_data_from_objects(double *data, int idx)
+{
+	uint64_t objects[DATA_SIZE];
+
+	gen_objects(objects, idx);
+
+	for (int i = 0; i < DATA_SIZE; i++)
+		data[i] = objects[i];
+}
+
+START_TEST(test_objects_dispersion)
+{
+	chi2_test(gen_data_from_objects);
+}
+END_TEST
+
+static Suite *test_suite(void)
+{
+	Suite *s = suite_create("test hash");
+
+	TCase *tc_basic1 = tcase_create("sd_hash");
+	TCase *tc_basic2 = tcase_create("sd_hash_next");
+	TCase *tc_nodes1 = tcase_create("many vnodes");
+	TCase *tc_nodes2 = tcase_create("many daemons with one vnode");
+	TCase *tc_nodes3 = tcase_create("many daemons with some vnodes");
+	TCase *tc_nodes4 = tcase_create("many nodes with one vnode");
+	TCase *tc_nodes5 = tcase_create("many nodes with some vnodes");
+	TCase *tc_disks1 = tcase_create("many vdisks on one disk");
+	TCase *tc_disks2 = tcase_create("many disks with one vnode");
+	TCase *tc_disks3 = tcase_create("many disks with some vnodes");
+	TCase *tc_objects1 = tcase_create("many data objects");
+	TCase *tc_objects2 = tcase_create("many vdi objects");
+
+	tcase_add_checked_fixture(tc_basic1, basic1_setup, NULL);
+	tcase_add_checked_fixture(tc_basic2, basic2_setup, NULL);
+	tcase_add_checked_fixture(tc_nodes1, node1_setup, NULL);
+	tcase_add_checked_fixture(tc_nodes2, node2_setup, NULL);
+	tcase_add_checked_fixture(tc_nodes3, node3_setup, NULL);
+	tcase_add_checked_fixture(tc_nodes4, node4_setup, NULL);
+	tcase_add_checked_fixture(tc_nodes5, node5_setup, NULL);
+	tcase_add_checked_fixture(tc_disks1, disk1_setup, NULL);
+	tcase_add_checked_fixture(tc_disks2, disk2_setup, NULL);
+	tcase_add_checked_fixture(tc_disks3, disk3_setup, NULL);
+	tcase_add_checked_fixture(tc_objects1, object1_setup, NULL);
+	tcase_add_checked_fixture(tc_objects2, object2_setup, NULL);
+
+	tcase_add_test(tc_basic1, test_basic_dispersion);
+	tcase_add_test(tc_basic2, test_basic_dispersion);
+	tcase_add_test(tc_nodes1, test_nodes_dispersion);
+	tcase_add_test(tc_nodes2, test_nodes_update);
+	tcase_add_test(tc_nodes2, test_nodes_dispersion);
+	tcase_add_test(tc_nodes3, test_nodes_update);
+	tcase_add_test(tc_nodes3, test_nodes_dispersion);
+	tcase_add_test(tc_nodes4, test_nodes_update);
+	tcase_add_test(tc_nodes4, test_nodes_dispersion);
+	tcase_add_test(tc_nodes5, test_nodes_update);
+	tcase_add_test(tc_nodes5, test_nodes_dispersion);
+	tcase_add_test(tc_disks1, test_disks_dispersion);
+	tcase_add_test(tc_disks2, test_disks_update);
+	tcase_add_test(tc_disks2, test_disks_dispersion);
+	tcase_add_test(tc_disks3, test_disks_update);
+	tcase_add_test(tc_disks3, test_disks_dispersion);
+	tcase_add_test(tc_objects1, test_objects_dispersion);
+	tcase_add_test(tc_objects2, test_objects_dispersion);
+
+	suite_add_tcase(s, tc_basic1);
+	suite_add_tcase(s, tc_basic2);
+	suite_add_tcase(s, tc_nodes1);
+	suite_add_tcase(s, tc_nodes2);
+	suite_add_tcase(s, tc_nodes3);
+	suite_add_tcase(s, tc_disks1);
+	suite_add_tcase(s, tc_disks2);
+	suite_add_tcase(s, tc_objects1);
+	suite_add_tcase(s, tc_objects2);
+
+	return s;
+}
+
+int main(void)
+{
+	int number_failed;
+	Suite *s = test_suite();
+	SRunner *sr = srunner_create(s);
+	srunner_run_all(sr, CK_NORMAL);
+	number_failed = srunner_ntests_failed(sr);
+	srunner_free(sr);
+	return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
-- 
1.7.9.5




More information about the sheepdog mailing list