[sheepdog] [PATCH 9/9] sheep: log and play locking/unlock information in newly joining node

Fri Jun 27 08:13:56 CEST 2014

We need to handle a case like below:
1. a new sheep joins to cluster
2. before the new sheep finishes copying snapshot of vdi state, a
   client issues lock request
3. the new sheep finishes copying the state

If this execution pattern happens, the lock information produced in
the step 2 cannot be obtained by the new sheep. This patch solve this
problem.

Brief description of the solution:
When sheep joins cluster, it sets its status as
SD_STATUS_COLLECTING_CINFO. When the sheep receives lock/unlock
requests in this state, it logs the operation in its internal
queue. After copying snapshot (step 3), the sheep play the log and
construct complete locking status.

Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 sheep/group.c      |    2 +
 sheep/ops.c        |   21 ++++++++++++++++++++
 sheep/sheep_priv.h |    3 ++
 sheep/vdi.c        |   54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 80 insertions(+), 0 deletions(-)

diff --git a/sheep/group.c b/sheep/group.c
index 2a02d11..52c822b 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -746,6 +746,8 @@ static void cinfo_collection_done(struct work *work)
 	free(w);
 	collect_work = NULL;
 
+	play_logged_vdi_ops();
+
 	sd_debug("cluster info collection finished");
 	sys->cinfo.status = next_status;
 }
diff --git a/sheep/ops.c b/sheep/ops.c
index ac219cb..659563c 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -239,6 +239,15 @@ static int cluster_lock_vdi_work(struct request *req)
 {
 	int ret;
 
+	if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) {
+		/*
+		 * this node is collecting vdi locking status, not ready for
+		 * allowing lock by itself
+		 */
+		sd_err("This node is not ready for vdi locking, try later");
+		return SD_RES_COLLECTING_CINFO;
+	}
+
 	if (req->ci->locking_interest_vid) {
 		/* 1 fd cannot lock more than 2 VIDs */
 		sd_debug("unlocking VID: %"PRIx32, req->ci->interest_vid);
@@ -1269,6 +1278,12 @@ static int cluster_lock_vdi(const struct sd_req *req, struct sd_rsp *rsp,
 
 	sd_info("node: %s is locking VDI: %"PRIx32, node_to_str(sender), vid);
 
+	if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) {
+		sd_debug("logging vdi lock information for later replay");
+		log_vdi_op_lock(vid, &sender->nid);
+		return SD_RES_SUCCESS;
+	}
+
 	if (!lock_vdi(vid, &sender->nid)) {
 		sd_err("locking %"PRIx32 "failed", vid);
 		return SD_RES_VDI_NOT_LOCKED;
@@ -1294,6 +1309,12 @@ static int cluster_release_vdi_main(const struct sd_req *req,
 
 	sd_info("node: %s is unlocking VDI: %"PRIx32, node_to_str(sender), vid);
 
+	if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) {
+		sd_debug("logging vdi unlock information for later replay");
+		log_vdi_op_unlock(vid, &sender->nid);
+		return SD_RES_SUCCESS;
+	}
+
 	unlock_vdi(vid, &sender->nid);
 
 	if (node_is_local(sender)) {
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index ab6180f..1d4641b 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -359,6 +359,9 @@ void notify_release_vdi(uint32_t vid);
 void take_vdi_state_snapshot(int epoch);
 int get_vdi_state_snapshot(int epoch, void *data);
 void free_vdi_state_snapshot(int epoch);
+void log_vdi_op_lock(uint32_t vid, const struct node_id *owner);
+void log_vdi_op_unlock(uint32_t vid, const struct node_id *owner);
+void play_logged_vdi_ops(void);
 
 extern int ec_max_data_strip;
 
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 13f0f5d..ced0ed9 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -378,6 +378,60 @@ out:
 	sd_rw_unlock(&vdi_state_lock);
 }
 
+static LIST_HEAD(logged_vdi_ops);
+
+struct vdi_op_log {
+	bool lock;
+	uint32_t vid;
+	struct node_id owner;
+
+	struct list_node list;
+};
+
+void log_vdi_op_lock(uint32_t vid, const struct node_id *owner)
+{
+	struct vdi_op_log *op;
+
+	op = xzalloc(sizeof(*op));
+	op->lock = true;
+	op->vid = vid;
+	memcpy(&op->owner, owner, sizeof(*owner));
+	INIT_LIST_NODE(&op->list);
+	list_add_tail(&op->list, &logged_vdi_ops);
+}
+
+void log_vdi_op_unlock(uint32_t vid, const struct node_id *owner)
+{
+	struct vdi_op_log *op;
+
+	op = xzalloc(sizeof(*op));
+	op->lock = false;
+	op->vid = vid;
+	memcpy(&op->owner, owner, sizeof(*owner));
+	INIT_LIST_NODE(&op->list);
+	list_add_tail(&op->list, &logged_vdi_ops);
+}
+
+void play_logged_vdi_ops(void)
+{
+	struct vdi_op_log *op;
+
+	list_for_each_entry(op, &logged_vdi_ops, list) {
+		struct vdi_state entry;
+
+		memset(&entry, 0, sizeof(entry));
+		entry.vid = op->vid;
+		memcpy(&entry.lock_owner, &op->owner,
+		       sizeof(op->owner));
+		if (op->lock)
+			entry.lock_state = LOCK_STATE_LOCKED;
+		else
+			entry.lock_state = LOCK_STATE_UNLOCKED;
+
+		apply_vdi_lock_state(&entry);
+	}
+}
+
 static struct sd_inode *alloc_inode(const struct vdi_iocb *iocb,
 				    uint32_t new_snapid, uint32_t new_vid,
 				    uint32_t *data_vdi_id,
-- 
1.7.1