[Sheepdog] panic in get_nth_node

huxinwei huxinwei at huawei.com
Wed Feb 29 09:30:23 CET 2012


Hi list:

  In my environment (2 sheep only), sheep always panic while recovering from a left node returning.

It turns out to be a intend behavior in get_nth_node:

=========================================
        if (idx == base) {
                panic("bug"); /* not found */
=========================================

  While I agree this is the correct in most scenarios, it does seem to be too intrusive while recovering in my trivial test.
To be specific, find_tgt_node calls get_nth_node

  I don't have a lot of faith in my own workaround either. Let me know what you think ;)

  Thanks.

diff --git a/include/sheep.h b/include/sheep.h
index e435b63..77516a8 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -180,7 +180,7 @@ static inline int same_zone(struct sd_vnode *e, int n1, int n2)
 
 /* traverse the virtual node list and return the n'th one */
 static inline int get_nth_node(struct sd_vnode *entries,
-			       int nr_entries, int base, int n)
+			       int nr_entries, int base, int n, int hard)
 {
 	int nodes[SD_MAX_REDUNDANCY];
 	int nr = 0, idx = base, i;
@@ -190,7 +190,12 @@ static inline int get_nth_node(struct sd_vnode *entries,
 next:
 		idx = (idx + 1) % nr_entries;
 		if (idx == base) {
-			panic("bug"); /* not found */
+			if (hard == 1)
+				panic("bug"); /* not found */
+			else
+				//FIXME: we know it may fail with find_tgt_node
+				//when search for previous target node in certain conditions
+				return -1;
 		}
 		for (i = 0; i < nr; i++) {
 			if (same_node(entries, idx, nodes[i]))
@@ -216,7 +221,7 @@ static inline int hval_to_sheep(struct sd_vnode *entries,
 		if (id > e->id && id <= n->id)
 			break;
 	}
-	return get_nth_node(entries, nr_entries, (i + 1) % nr_entries, idx);
+	return get_nth_node(entries, nr_entries, (i + 1) % nr_entries, idx, 1);
 }
 
 static inline int obj_to_sheep(struct sd_vnode *entries,
diff --git a/sheep/store.c b/sheep/store.c
index 256feae..3076fca 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1028,7 +1028,7 @@ static int contains_node(struct sd_vnode *key,
 	int i;
 
 	for (i = 0; i < copies; i++) {
-		int idx = get_nth_node(entry, nr, base_idx, i);
+		int idx = get_nth_node(entry, nr, base_idx, i, 1);
 		if (memcmp(key->addr, entry[idx].addr, sizeof(key->addr)) == 0
 		    && key->port == entry[idx].port)
 			return idx;
@@ -1107,29 +1107,35 @@ static int find_tgt_node(struct sd_vnode *old_entry,
 			 int copy_idx)
 {
 	int i, j, idx;
+	int e = get_nth_node(cur_entry, cur_nr, cur_idx, copy_idx, 0);
 
 	dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32"\n",
 		old_idx, old_nr, old_copies, cur_idx, cur_nr, cur_copies, copy_idx);
 
 	/* If the same node is in the previous target nodes, return its index */
-	idx = contains_node(cur_entry + get_nth_node(cur_entry, cur_nr, cur_idx, copy_idx),
-			    old_entry, old_nr, old_idx, old_copies);
-	if (idx >= 0) {
-		dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32"\n", idx, copy_idx, cur_idx, cur_nr);
-		return idx;
+	if (e != -1) {
+		idx = contains_node(cur_entry + e,
+				old_entry, old_nr, old_idx, old_copies);
+		if (idx >= 0) {
+			dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32"\n", idx, copy_idx, cur_idx, cur_nr);
+			return idx;
+		}
 	}
 
+	dprintf("it's not the same node as previous target\n");
+
 	for (i = 0, j = 0; ; i++, j++) {
 		if (i < copy_idx) {
 			/* Skip if the node can recover from its local */
-			idx = contains_node(cur_entry + get_nth_node(cur_entry, cur_nr, cur_idx, i),
+			idx = contains_node(cur_entry + get_nth_node(cur_entry, cur_nr, cur_idx, i, 1),
 					    old_entry, old_nr, old_idx, old_copies);
 			if (idx >= 0)
 				continue;
 
+			dprintf("it cannot recover from its local\n");
 			/* Find the next target which needs to recover from remote */
 			while (j < old_copies &&
-			       contains_node(old_entry + get_nth_node(old_entry, old_nr, old_idx, j),
+			       contains_node(old_entry + get_nth_node(old_entry, old_nr, old_idx, j, 1),
 					     cur_entry, cur_nr, cur_idx, cur_copies) >= 0)
 				j++;
 		}
@@ -1145,9 +1151,9 @@ static int find_tgt_node(struct sd_vnode *old_entry,
 		if (i == copy_idx) {
 			/* Found the target node correspoinding to copy_idx */
 			dprintf("%"PRIu32", %"PRIu32", %"PRIu32"\n",
-				get_nth_node(old_entry, old_nr, old_idx, j),
+				get_nth_node(old_entry, old_nr, old_idx, j, 1),
 				copy_idx, (cur_idx + i) % cur_nr);
-			return get_nth_node(old_entry, old_nr, old_idx, j);
+			return get_nth_node(old_entry, old_nr, old_idx, j, 1);
 		}
 
 	}



More information about the sheepdog mailing list