[sheepdog] [PATCH 3/4] sheep: rollback vnode info which is younger than last gathered epoch for ec

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Mon Oct 20 09:07:13 CEST 2014


sheep needs to rollback vnode info history beyond last gathered epoch
for erasure coding. Because erasure coded VDIs require recovery from
stale directory when a number of nodes is smaller than a number of
data stripes.

Related bug:
https://bugs.launchpad.net/sheepdog-project/+bug/1367612

Reported-by: Valerio Pachera <sirio81 at gmail.com>
Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 sheep/recovery.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/sheep/recovery.c b/sheep/recovery.c
index c497ae2..f5605ac 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -99,7 +99,8 @@ static inline bool node_is_gateway_only(void)
 
 static struct vnode_info *rollback_vnode_info(uint32_t *epoch,
 					      struct recovery_info *rinfo,
-					      struct vnode_info *cur)
+					      struct vnode_info *cur,
+					      bool ec)
 {
 	struct sd_node nodes[SD_MAX_NODES];
 	int nr_nodes;
@@ -107,7 +108,7 @@ static struct vnode_info *rollback_vnode_info(uint32_t *epoch,
 
 rollback:
 	*epoch -= 1;
-	if (*epoch < last_gathered_epoch)
+	if (!ec && *epoch < last_gathered_epoch)
 		return NULL;
 
 	nr_nodes = get_nodes_epoch(*epoch, cur, nodes, sizeof(nodes));
@@ -115,6 +116,10 @@ rollback:
 		/* We rollback in case we don't get a valid epoch */
 		sd_alert("cannot get epoch %d", *epoch);
 		sd_alert("clients may see old data");
+
+		if (!*epoch)
+			return NULL;
+
 		goto rollback;
 	}
 	/* double check */
@@ -220,7 +225,7 @@ again:
 	default:
 rollback:
 		new_old = rollback_vnode_info(&tgt_epoch, rw->rinfo,
-					      rw->cur_vinfo);
+					      rw->cur_vinfo, true);
 		if (!new_old) {
 			sd_err("can not read %"PRIx64" idx %d", oid, idx);
 			free(buf);
@@ -404,7 +409,7 @@ again:
 	default:
 		/* No luck, roll back to an older configuration and try again */
 		new_old = rollback_vnode_info(&tgt_epoch, rw->rinfo,
-					      rw->cur_vinfo);
+					      rw->cur_vinfo, false);
 		if (!new_old) {
 			sd_err("can not recover oid %"PRIx64, oid);
 			ret = -1;
-- 
1.8.3.2




More information about the sheepdog mailing list