[Sheepdog] [PATCH] sheep: add comments about object recovery

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Mon Mar 21 06:58:35 CET 2011


Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/store.c |   49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 49 insertions(+), 0 deletions(-)

diff --git a/sheep/store.c b/sheep/store.c
index 8942d46..bca5846 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1058,6 +1058,13 @@ static int node_distance(int my, int her, int nr)
 	return (my + nr - her) % nr;
 }
 
+/*
+ * contains_node - checks that the node id is included in the target nodes
+ *
+ * The target nodes to store replicated objects are the first N nodes
+ * from the base_idx'th on the consistent hash ring, where N is the
+ * number of copies of objects.
+ */
 static int contains_node(uint64_t id, struct sheepdog_node_list_entry *entry,
 			 int nr, int base_idx)
 {
@@ -1087,6 +1094,38 @@ static LIST_HEAD(recovery_work_list);
 static struct recovery_work *recovering_work;
 static uint64_t blocking_oid;
 
+/*
+ * find_tgt_node - find the node from which we should recover objects
+ *
+ * This function compares two node lists, the current target nodes and
+ * the previous target nodes, and finds the node from the previous
+ * target nodes which corresponds to the copy_idx'th node of the
+ * current target nodes.  The correspondence is injective and
+ * maximizes the number of nodes which can recover objects locally.
+ *
+ * For example, consider the number of redundancy is 5, the consistent
+ * hash ring is {A, B, C, D, E, F}, and the node G is newly added.
+ * The parameters of this function are
+ *   old_entry = {A, B, C, D, E, F},    old_nr = 6, old_idx = 3
+ *   cur_entry = {A, B, C, D, E, F, G}, cur_nr = 7, cur_idx = 3
+ *
+ * In this case:
+ *   the previous target nodes: {D, E, F, A, B}
+ *     (the first 5 nodes from the 3rd node on the previous hash ring)
+ *   the current target nodes : {D, E, F, G, A}
+ *     (the first 5 nodes from the 3rd node on the current hash ring)
+ *
+ * The correspondence between copy_idx and return value are as follows:
+ * ----------------------------
+ * copy_idx       0  1  2  3  4
+ * src_node       D  E  F  G  A
+ * tgt_node       D  E  F  B  A
+ * return value   0  1  2  4  3
+ * ----------------------------
+ *
+ * The node D, E, F, and A can recover objects from local, and the
+ * node G recovers from the node B.
+ */
 static int find_tgt_node(struct sheepdog_node_list_entry *old_entry, int old_nr, int old_idx,
 			 struct sheepdog_node_list_entry *cur_entry, int cur_nr, int cur_idx,
 			 int copy_idx)
@@ -1096,6 +1135,7 @@ static int find_tgt_node(struct sheepdog_node_list_entry *old_entry, int old_nr,
 	dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32"\n", old_idx, old_nr, cur_idx, cur_nr, copy_idx);
 
 	if (copy_idx < cur_nr) {
+		/* If the same node is in the previous target nodes, return its index */
 		idx = contains_node(cur_entry[(cur_idx + copy_idx) % cur_nr].id,
 				    old_entry, old_nr, old_idx);
 		if (idx >= 0) {
@@ -1106,16 +1146,24 @@ static int find_tgt_node(struct sheepdog_node_list_entry *old_entry, int old_nr,
 
 	for (i = 0, j = 0; ; i++, j++) {
 		if (i < cur_nr) {
+			/* Skip if the node can recover from its local */
 			idx = contains_node(cur_entry[(cur_idx + i) % cur_nr].id,
 					    old_entry, old_nr, old_idx);
 			if (idx >= 0)
 				continue;
 
+			/* Find the next target which needs to recover from remote */
 			while (contains_node(old_entry[(old_idx + j) % old_nr].id,
 					     cur_entry, cur_nr, cur_idx) >= 0 && j < old_nr)
 				j++;
 		}
 		if (j == old_nr) {
+			/*
+			 * Cannot find the target because the number of nodes
+			 * is smaller than the number of copies.  We can select
+			 * any node in this case, so select the first one.
+			 */
+
 			/* old_nr should be smaller than sys->nr_sobjs */
 			if (old_nr >= sys->nr_sobjs)
 				eprintf("bug: %"PRIu32", %"PRIu32"\n", old_nr, sys->nr_sobjs);
@@ -1124,6 +1172,7 @@ static int find_tgt_node(struct sheepdog_node_list_entry *old_entry, int old_nr,
 		}
 
 		if (i == copy_idx) {
+			/* Found the target node correspoinding to copy_idx */
 			dprintf("%"PRIu32", %"PRIu32", %"PRIu32"\n", (old_idx + j) % old_nr, copy_idx,
 				(cur_idx + i) % cur_nr);
 			return (old_idx + j) % old_nr;
-- 
1.7.1




More information about the sheepdog mailing list