[sheepdog] [PATCH 2/2] sheep: handle a case that only the node itself has an inode object during checkpoint sync
Hitoshi Mitake
mitake.hitoshi at lab.ntt.co.jp
Tue Jun 23 15:50:38 CEST 2015
This patch lets sheep handle a case that only the node itself has an
inode object during checkpoint sync. There is a possibility that other
nodes doesn't have an entry of checkpoint for required VID but the
node itself has an inode object e.g. copy=1 (look at test case 074).
In such a case, vdi state isn't required to be constructed during
checkpoint syncing.
Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
include/internal_proto.h | 3 ++-
sheep/group.c | 36 ++++++++++++++++++++++++++++++++++--
sheep/vdi.c | 4 ++++
3 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/include/internal_proto.h b/include/internal_proto.h
index b3cbbc5..90421f4 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -149,7 +149,8 @@
#define SD_RES_COLLECTING_CINFO 0x95
#define SD_RES_GATEWAY_MODE 0x97 /* Target node is gateway mode */
#define SD_RES_INVALID_VNODES_STRATEGY 0x98 /* Invalid vnodes strategy */
-
+/* Node doesn't have a required entry of checkpoint */
+#define SD_RES_NO_CHECKPOINT_ENTRY 0x99
#define SD_CLUSTER_FLAG_STRICT 0x0001 /* Strict mode for write */
#define SD_CLUSTER_FLAG_DISKMODE 0x0002 /* Disk mode for cluster */
diff --git a/sheep/group.c b/sheep/group.c
index 936695f..f134f74 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -693,6 +693,8 @@ struct cinfo_collection_work {
struct vdi_state result;
uint32_t next_vid;
+
+ bool skip;
};
static struct cinfo_collection_work *collect_work;
@@ -711,12 +713,26 @@ static int do_cinfo_collection_work(uint32_t epoch, uint32_t vid,
return sheep_exec_req(&n->nid, &hdr, (char *)result);
}
+static bool check_inode_obj_exist(uint32_t vid, int epoch)
+{
+ struct siocb iocb;
+ char buf[1]; /* dummy */
+
+ memset(&iocb, 0, sizeof(iocb));
+ iocb.epoch = epoch;
+ iocb.buf = &buf;
+ iocb.length = 1;
+ iocb.offset = 0;
+ iocb.ec_index = -1;
+ return SD_RES_SUCCESS == sd_store->read(vid_to_vdi_oid(vid), &iocb);
+}
+
static void cinfo_collection_work(struct work *work)
{
struct cinfo_collection_work *w =
container_of(work, struct cinfo_collection_work, work);
struct sd_node *n;
- int ret;
+ int ret, nr_nodes_no_chkpt = 0;
sd_debug("start collection of cinfo, epoch: %d, vid: %"PRIx32,
w->epoch, w->next_vid);
@@ -731,6 +747,21 @@ static void cinfo_collection_work(struct work *work)
&w->result);
if (ret == SD_RES_SUCCESS)
return;
+ else if (ret == SD_RES_NO_CHECKPOINT_ENTRY)
+ nr_nodes_no_chkpt++;
+ }
+
+ if (nr_nodes_no_chkpt + 1 == w->members->nr_nodes) {
+ sd_info("other nodes doesn't have a entry of checkpoint for"
+ " VID: %"PRIx32" at epoch %d", w->next_vid, w->epoch);
+
+ if (check_inode_obj_exist(w->next_vid, w->epoch)) {
+ w->skip = true;
+ return;
+ }
+
+ panic("this node should have object of inode: %"PRIx64
+ "but doesn't have", vid_to_vdi_oid(w->next_vid));
}
/*
@@ -787,7 +818,8 @@ static main_fn void cinfo_collection_done(struct work *work)
sd_debug("owner: %s",
addr_to_str(vs->lock_owner.addr, vs->lock_owner.port));
- apply_vdi_lock_state(vs);
+ if (!w->skip)
+ apply_vdi_lock_state(vs);
next_vid = find_next_bit(sys->vdi_inuse, SD_NR_VDIS, w->next_vid + 1);
if (next_vid == SD_NR_VDIS) {
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 0d4aeb1..f6e3a5f 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -2167,6 +2167,10 @@ main_fn int get_vdi_state_checkpoint(int epoch, uint32_t vid, void *data)
goto found;
}
}
+
+ sd_info("this node doesn't have a required entry of VID:"
+ " %"PRIx32" at epoch %d", vid, epoch);
+ return SD_RES_NO_CHECKPOINT_ENTRY;
}
}
--
1.9.1
More information about the sheepdog
mailing list