[sheepdog] [PATCH 9/9] sheep: show error message when object may be lost

MORITA Kazutaka morita.kazutaka at gmail.com
Mon May 6 19:45:56 CEST 2013


From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

>From the point of view of block storage semantics, sheepdog must not show the
old data to clients.  If sheep may break the data consistency, sheep should
alert it so that users can know the problem.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/recovery.c |   34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/sheep/recovery.c b/sheep/recovery.c
index 8962742..6a67fba 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -175,6 +175,7 @@ static int do_recover_object(struct recovery_obj_work *row)
 	uint64_t oid = row->oid;
 	uint32_t epoch = rw->epoch, tgt_epoch = rw->epoch;
 	int nr_copies, ret, i, start = 0;
+	bool data_lost;
 
 	old = grab_vnode_info(rw->old_vinfo);
 
@@ -195,6 +196,7 @@ again:
 		}
 	}
 
+	data_lost = true;
 	/* Let's do a breadth-first search */
 	for (i = 0; i < nr_copies; i++) {
 		const struct sd_vnode *tgt_vnode;
@@ -207,20 +209,37 @@ again:
 			continue;
 		ret = recover_object_from_replica(oid, tgt_vnode,
 						  epoch, tgt_epoch);
-		if (ret == SD_RES_SUCCESS) {
+		if (ret == SD_RES_SUCCESS)
 			/* Succeed */
 			break;
-		} else if (SD_RES_OLD_NODE_VER == ret) {
+
+		switch (ret) {
+		case SD_RES_OLD_NODE_VER:
 			row->stop = true;
 			goto err;
-		} else
+		case SD_RES_NO_OBJ:
+			/*
+			 * No object means that there was no write success at
+			 * this epoch.
+			 */
+			data_lost = false;
+			/* fall through */
+		default:
 			ret = -1;
+			break;
+		}
+
 	}
 
 	/* No luck, roll back to an older configuration and try again */
 	if (ret < 0) {
 		struct vnode_info *new_old;
 
+		if (data_lost) {
+			sd_printf(SDOG_ALERT, "cannot access any replicas of "
+				  "%"PRIx64" at epoch %d", oid, tgt_epoch);
+			sd_printf(SDOG_ALERT, "clients may see old data");
+		}
 rollback:
 		tgt_epoch--;
 		if (tgt_epoch < 1) {
@@ -230,9 +249,12 @@ rollback:
 		}
 
 		new_old = get_vnode_info_epoch(tgt_epoch, rw->cur_vinfo);
-		if (!new_old)
+		if (!new_old) {
 			/* We rollback in case we don't get a valid epoch */
+			sd_printf(SDOG_ALERT, "cannot get epoch %d", tgt_epoch);
+			sd_printf(SDOG_ALERT, "clients may see old data");
 			goto rollback;
+		}
 
 		put_vnode_info(old);
 		old = new_old;
@@ -620,6 +642,10 @@ retry:
 		buf = xrealloc(buf, buf_size);
 		goto retry;
 	default:
+		sd_printf(SDOG_ALERT, "cannot get object list from %s:%d", name,
+			  e->nid.port);
+		sd_printf(SDOG_ALERT, "some objects may be not recovered at "
+			  "epoch %d", epoch);
 		free(buf);
 		return NULL;
 	}
-- 
1.7.9.5




More information about the sheepdog mailing list