[Sheepdog] panic in get_nth_node
huxinwei
huxinwei at huawei.com
Wed Feb 29 09:30:23 CET 2012
Hi list:
In my environment (2 sheep only), sheep always panic while recovering from a left node returning.
It turns out to be a intend behavior in get_nth_node:
=========================================
if (idx == base) {
panic("bug"); /* not found */
=========================================
While I agree this is the correct in most scenarios, it does seem to be too intrusive while recovering in my trivial test.
To be specific, find_tgt_node calls get_nth_node
I don't have a lot of faith in my own workaround either. Let me know what you think ;)
Thanks.
diff --git a/include/sheep.h b/include/sheep.h
index e435b63..77516a8 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -180,7 +180,7 @@ static inline int same_zone(struct sd_vnode *e, int n1, int n2)
/* traverse the virtual node list and return the n'th one */
static inline int get_nth_node(struct sd_vnode *entries,
- int nr_entries, int base, int n)
+ int nr_entries, int base, int n, int hard)
{
int nodes[SD_MAX_REDUNDANCY];
int nr = 0, idx = base, i;
@@ -190,7 +190,12 @@ static inline int get_nth_node(struct sd_vnode *entries,
next:
idx = (idx + 1) % nr_entries;
if (idx == base) {
- panic("bug"); /* not found */
+ if (hard == 1)
+ panic("bug"); /* not found */
+ else
+ //FIXME: we know it may fail with find_tgt_node
+ //when search for previous target node in certain conditions
+ return -1;
}
for (i = 0; i < nr; i++) {
if (same_node(entries, idx, nodes[i]))
@@ -216,7 +221,7 @@ static inline int hval_to_sheep(struct sd_vnode *entries,
if (id > e->id && id <= n->id)
break;
}
- return get_nth_node(entries, nr_entries, (i + 1) % nr_entries, idx);
+ return get_nth_node(entries, nr_entries, (i + 1) % nr_entries, idx, 1);
}
static inline int obj_to_sheep(struct sd_vnode *entries,
diff --git a/sheep/store.c b/sheep/store.c
index 256feae..3076fca 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1028,7 +1028,7 @@ static int contains_node(struct sd_vnode *key,
int i;
for (i = 0; i < copies; i++) {
- int idx = get_nth_node(entry, nr, base_idx, i);
+ int idx = get_nth_node(entry, nr, base_idx, i, 1);
if (memcmp(key->addr, entry[idx].addr, sizeof(key->addr)) == 0
&& key->port == entry[idx].port)
return idx;
@@ -1107,29 +1107,35 @@ static int find_tgt_node(struct sd_vnode *old_entry,
int copy_idx)
{
int i, j, idx;
+ int e = get_nth_node(cur_entry, cur_nr, cur_idx, copy_idx, 0);
dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32"\n",
old_idx, old_nr, old_copies, cur_idx, cur_nr, cur_copies, copy_idx);
/* If the same node is in the previous target nodes, return its index */
- idx = contains_node(cur_entry + get_nth_node(cur_entry, cur_nr, cur_idx, copy_idx),
- old_entry, old_nr, old_idx, old_copies);
- if (idx >= 0) {
- dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32"\n", idx, copy_idx, cur_idx, cur_nr);
- return idx;
+ if (e != -1) {
+ idx = contains_node(cur_entry + e,
+ old_entry, old_nr, old_idx, old_copies);
+ if (idx >= 0) {
+ dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32"\n", idx, copy_idx, cur_idx, cur_nr);
+ return idx;
+ }
}
+ dprintf("it's not the same node as previous target\n");
+
for (i = 0, j = 0; ; i++, j++) {
if (i < copy_idx) {
/* Skip if the node can recover from its local */
- idx = contains_node(cur_entry + get_nth_node(cur_entry, cur_nr, cur_idx, i),
+ idx = contains_node(cur_entry + get_nth_node(cur_entry, cur_nr, cur_idx, i, 1),
old_entry, old_nr, old_idx, old_copies);
if (idx >= 0)
continue;
+ dprintf("it cannot recover from its local\n");
/* Find the next target which needs to recover from remote */
while (j < old_copies &&
- contains_node(old_entry + get_nth_node(old_entry, old_nr, old_idx, j),
+ contains_node(old_entry + get_nth_node(old_entry, old_nr, old_idx, j, 1),
cur_entry, cur_nr, cur_idx, cur_copies) >= 0)
j++;
}
@@ -1145,9 +1151,9 @@ static int find_tgt_node(struct sd_vnode *old_entry,
if (i == copy_idx) {
/* Found the target node correspoinding to copy_idx */
dprintf("%"PRIu32", %"PRIu32", %"PRIu32"\n",
- get_nth_node(old_entry, old_nr, old_idx, j),
+ get_nth_node(old_entry, old_nr, old_idx, j, 1),
copy_idx, (cur_idx + i) % cur_nr);
- return get_nth_node(old_entry, old_nr, old_idx, j);
+ return get_nth_node(old_entry, old_nr, old_idx, j, 1);
}
}
More information about the sheepdog
mailing list