[sheepdog] [PATCH 2/2] sheep: handle a case that only the node itself has an inode object during checkpoint sync

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Tue Jun 23 15:50:38 CEST 2015


This patch lets sheep handle a case that only the node itself has an
inode object during checkpoint sync. There is a possibility that other
nodes doesn't have an entry of checkpoint for required VID but the
node itself has an inode object e.g. copy=1 (look at test case 074).

In such a case, vdi state isn't required to be constructed during
checkpoint syncing.

Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 include/internal_proto.h |  3 ++-
 sheep/group.c            | 36 ++++++++++++++++++++++++++++++++++--
 sheep/vdi.c              |  4 ++++
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/include/internal_proto.h b/include/internal_proto.h
index b3cbbc5..90421f4 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -149,7 +149,8 @@
 #define SD_RES_COLLECTING_CINFO 0x95
 #define SD_RES_GATEWAY_MODE  0x97 /* Target node is gateway mode */
 #define SD_RES_INVALID_VNODES_STRATEGY 0x98 /* Invalid vnodes strategy */
-
+/* Node doesn't have a required entry of checkpoint */
+#define SD_RES_NO_CHECKPOINT_ENTRY 0x99
 
 #define SD_CLUSTER_FLAG_STRICT		0x0001 /* Strict mode for write */
 #define SD_CLUSTER_FLAG_DISKMODE	0x0002 /* Disk mode for cluster */
diff --git a/sheep/group.c b/sheep/group.c
index 936695f..f134f74 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -693,6 +693,8 @@ struct cinfo_collection_work {
 
 	struct vdi_state result;
 	uint32_t next_vid;
+
+	bool skip;
 };
 
 static struct cinfo_collection_work *collect_work;
@@ -711,12 +713,26 @@ static int do_cinfo_collection_work(uint32_t epoch, uint32_t vid,
 	return sheep_exec_req(&n->nid, &hdr, (char *)result);
 }
 
+static bool check_inode_obj_exist(uint32_t vid, int epoch)
+{
+	struct siocb iocb;
+	char buf[1];		/* dummy */
+
+	memset(&iocb, 0, sizeof(iocb));
+	iocb.epoch = epoch;
+	iocb.buf = &buf;
+	iocb.length = 1;
+	iocb.offset = 0;
+	iocb.ec_index = -1;
+	return SD_RES_SUCCESS == sd_store->read(vid_to_vdi_oid(vid), &iocb);
+}
+
 static void cinfo_collection_work(struct work *work)
 {
 	struct cinfo_collection_work *w =
 		container_of(work, struct cinfo_collection_work, work);
 	struct sd_node *n;
-	int ret;
+	int ret, nr_nodes_no_chkpt = 0;
 
 	sd_debug("start collection of cinfo, epoch: %d, vid: %"PRIx32,
 		 w->epoch, w->next_vid);
@@ -731,6 +747,21 @@ static void cinfo_collection_work(struct work *work)
 					       &w->result);
 		if (ret == SD_RES_SUCCESS)
 			return;
+		else if (ret == SD_RES_NO_CHECKPOINT_ENTRY)
+			nr_nodes_no_chkpt++;
+	}
+
+	if (nr_nodes_no_chkpt + 1 == w->members->nr_nodes) {
+		sd_info("other nodes doesn't have a entry of checkpoint for"
+			" VID: %"PRIx32" at epoch %d", w->next_vid, w->epoch);
+
+		if (check_inode_obj_exist(w->next_vid, w->epoch)) {
+			w->skip = true;
+			return;
+		}
+
+		panic("this node should have object of inode: %"PRIx64
+		      "but doesn't have", vid_to_vdi_oid(w->next_vid));
 	}
 
 	/*
@@ -787,7 +818,8 @@ static main_fn void cinfo_collection_done(struct work *work)
 	sd_debug("owner: %s",
 		 addr_to_str(vs->lock_owner.addr, vs->lock_owner.port));
 
-	apply_vdi_lock_state(vs);
+	if (!w->skip)
+		apply_vdi_lock_state(vs);
 
 	next_vid = find_next_bit(sys->vdi_inuse, SD_NR_VDIS, w->next_vid + 1);
 	if (next_vid == SD_NR_VDIS) {
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 0d4aeb1..f6e3a5f 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -2167,6 +2167,10 @@ main_fn int get_vdi_state_checkpoint(int epoch, uint32_t vid, void *data)
 					goto found;
 				}
 			}
+
+			sd_info("this node doesn't have a required entry of VID:"
+				" %"PRIx32" at epoch %d", vid, epoch);
+			return SD_RES_NO_CHECKPOINT_ENTRY;
 		}
 	}
 
-- 
1.9.1



More information about the sheepdog mailing list