[sheepdog] [PATCH 3/9] sheep: move objects to stale direcotry before starting recovery

MORITA Kazutaka morita.kazutaka at gmail.com
Mon May 6 19:45:50 CEST 2013


From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

Currently, if Sheepdog crashes before finishing recovery, stale objects can
exist in the working directory.  This confuses us when we recover Sheepdog
manually when the cluster completely goes wrong because we cannot know which
objects are the latest ones.

This patch moves the stale objects to .stale directory before starting object
recovery and adds the correct suffix when they became stale.  After this change,
we can guarantee that

 - All the objects in the working directory are the latest ones.
 - If the stale object has the newer suffix, it is the newer data.  For example,
   if node A has 0000.1 and node B has 0000.2 in the stale directories, 0000.2
   is a newer object.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/farm/farm.c   |    2 +-
 sheep/plain_store.c |    7 ++++---
 sheep/recovery.c    |   10 +++++++---
 sheep/sheep_priv.h  |    7 ++-----
 4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index 2e85ed6..cca8bbd 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -258,7 +258,7 @@ static struct store_driver farm = {
 	.write = default_write,
 	.read = default_read,
 	.link = default_link,
-	.end_recover = default_end_recover,
+	.update_epoch = default_update_epoch,
 	.snapshot = farm_snapshot,
 	.cleanup = default_cleanup,
 	.restore = farm_restore,
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 640f769..7e8eb9c 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -432,9 +432,10 @@ static int check_stale_objects(uint64_t oid, char *wd, void *arg)
 	return SD_RES_SUCCESS;
 }
 
-int default_end_recover(uint32_t old_epoch,
-			const struct vnode_info *old_vnode_info)
+int default_update_epoch(uint32_t epoch)
 {
+	uint32_t old_epoch = epoch - 1;
+
 	if (old_epoch == 0)
 		return SD_RES_SUCCESS;
 
@@ -488,7 +489,7 @@ static struct store_driver plain_store = {
 	.write = default_write,
 	.read = default_read,
 	.link = default_link,
-	.end_recover = default_end_recover,
+	.update_epoch = default_update_epoch,
 	.cleanup = default_cleanup,
 	.format = default_format,
 	.remove_object = default_remove_object,
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 688058a..919f597 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -325,6 +325,10 @@ static inline bool run_next_rw(struct recovery_work *rw)
 		return false;
 
 	free_recovery_work(rw);
+
+	if (sd_store->update_epoch)
+		sd_store->update_epoch(nrw->epoch);
+
 	thread_unsafe_set(recovering_work, nrw);
 	wakeup_all_requests();
 	queue_work(sys->recovery_wqueue, &nrw->work);
@@ -362,9 +366,6 @@ static inline void finish_recovery(struct recovery_work *rw)
 	uint32_t recovered_epoch = rw->epoch;
 	thread_unsafe_set(recovering_work, NULL);
 
-	if (sd_store->end_recover)
-		sd_store->end_recover(sys->epoch - 1, rw->old_vinfo);
-
 	wakeup_all_requests();
 
 	/* notify recovery completion to other nodes */
@@ -663,6 +664,9 @@ int start_recovery(struct vnode_info *cur_vinfo, struct vnode_info *old_vinfo)
 	rw->work.fn = prepare_object_list;
 	rw->work.done = finish_object_list;
 
+	if (sd_store->update_epoch)
+		sd_store->update_epoch(rw->epoch);
+
 	if (thread_unsafe_get(recovering_work) != NULL) {
 		/* skip the previous epoch recovery */
 		struct recovery_work *nrw = uatomic_xchg_ptr(&next_rw, rw);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index d01d408..09bc477 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -169,9 +169,7 @@ struct store_driver {
 	int (*remove_object)(uint64_t oid);
 	/* Operations in recovery */
 	int (*link)(uint64_t oid, uint32_t tgt_epoch);
-	/* int (*begin_recover)(const struct siocb *); */
-	int (*end_recover)(uint32_t epoch,
-			   const struct vnode_info *old_vnode_info);
+	int (*update_epoch)(uint32_t epoch);
 	int (*purge_obj)(void);
 	/* Operations for snapshot */
 	int (*snapshot)(const struct siocb *);
@@ -186,8 +184,7 @@ int default_create_and_write(uint64_t oid, const struct siocb *iocb);
 int default_write(uint64_t oid, const struct siocb *iocb);
 int default_read(uint64_t oid, const struct siocb *iocb);
 int default_link(uint64_t oid, uint32_t tgt_epoch);
-int default_end_recover(uint32_t old_epoch,
-			const struct vnode_info *old_vnode_info);
+int default_update_epoch(uint32_t epoch);
 int default_cleanup(void);
 int default_format(void);
 int default_remove_object(uint64_t oid);
-- 
1.7.9.5




More information about the sheepdog mailing list