[sheepdog] [PATCH 9/9] sheep: log and play locking/unlock information in newly joining node
Hitoshi Mitake
mitake.hitoshi at lab.ntt.co.jp
Fri Jun 27 08:13:56 CEST 2014
We need to handle a case like below:
1. a new sheep joins to cluster
2. before the new sheep finishes copying snapshot of vdi state, a
client issues lock request
3. the new sheep finishes copying the state
If this execution pattern happens, the lock information produced in
the step 2 cannot be obtained by the new sheep. This patch solve this
problem.
Brief description of the solution:
When sheep joins cluster, it sets its status as
SD_STATUS_COLLECTING_CINFO. When the sheep receives lock/unlock
requests in this state, it logs the operation in its internal
queue. After copying snapshot (step 3), the sheep play the log and
construct complete locking status.
Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
sheep/group.c | 2 +
sheep/ops.c | 21 ++++++++++++++++++++
sheep/sheep_priv.h | 3 ++
sheep/vdi.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 80 insertions(+), 0 deletions(-)
diff --git a/sheep/group.c b/sheep/group.c
index 2a02d11..52c822b 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -746,6 +746,8 @@ static void cinfo_collection_done(struct work *work)
free(w);
collect_work = NULL;
+ play_logged_vdi_ops();
+
sd_debug("cluster info collection finished");
sys->cinfo.status = next_status;
}
diff --git a/sheep/ops.c b/sheep/ops.c
index ac219cb..659563c 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -239,6 +239,15 @@ static int cluster_lock_vdi_work(struct request *req)
{
int ret;
+ if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) {
+ /*
+ * this node is collecting vdi locking status, not ready for
+ * allowing lock by itself
+ */
+ sd_err("This node is not ready for vdi locking, try later");
+ return SD_RES_COLLECTING_CINFO;
+ }
+
if (req->ci->locking_interest_vid) {
/* 1 fd cannot lock more than 2 VIDs */
sd_debug("unlocking VID: %"PRIx32, req->ci->interest_vid);
@@ -1269,6 +1278,12 @@ static int cluster_lock_vdi(const struct sd_req *req, struct sd_rsp *rsp,
sd_info("node: %s is locking VDI: %"PRIx32, node_to_str(sender), vid);
+ if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) {
+ sd_debug("logging vdi lock information for later replay");
+ log_vdi_op_lock(vid, &sender->nid);
+ return SD_RES_SUCCESS;
+ }
+
if (!lock_vdi(vid, &sender->nid)) {
sd_err("locking %"PRIx32 "failed", vid);
return SD_RES_VDI_NOT_LOCKED;
@@ -1294,6 +1309,12 @@ static int cluster_release_vdi_main(const struct sd_req *req,
sd_info("node: %s is unlocking VDI: %"PRIx32, node_to_str(sender), vid);
+ if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) {
+ sd_debug("logging vdi unlock information for later replay");
+ log_vdi_op_unlock(vid, &sender->nid);
+ return SD_RES_SUCCESS;
+ }
+
unlock_vdi(vid, &sender->nid);
if (node_is_local(sender)) {
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index ab6180f..1d4641b 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -359,6 +359,9 @@ void notify_release_vdi(uint32_t vid);
void take_vdi_state_snapshot(int epoch);
int get_vdi_state_snapshot(int epoch, void *data);
void free_vdi_state_snapshot(int epoch);
+void log_vdi_op_lock(uint32_t vid, const struct node_id *owner);
+void log_vdi_op_unlock(uint32_t vid, const struct node_id *owner);
+void play_logged_vdi_ops(void);
extern int ec_max_data_strip;
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 13f0f5d..ced0ed9 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -378,6 +378,60 @@ out:
sd_rw_unlock(&vdi_state_lock);
}
+static LIST_HEAD(logged_vdi_ops);
+
+struct vdi_op_log {
+ bool lock;
+ uint32_t vid;
+ struct node_id owner;
+
+ struct list_node list;
+};
+
+void log_vdi_op_lock(uint32_t vid, const struct node_id *owner)
+{
+ struct vdi_op_log *op;
+
+ op = xzalloc(sizeof(*op));
+ op->lock = true;
+ op->vid = vid;
+ memcpy(&op->owner, owner, sizeof(*owner));
+ INIT_LIST_NODE(&op->list);
+ list_add_tail(&op->list, &logged_vdi_ops);
+}
+
+void log_vdi_op_unlock(uint32_t vid, const struct node_id *owner)
+{
+ struct vdi_op_log *op;
+
+ op = xzalloc(sizeof(*op));
+ op->lock = false;
+ op->vid = vid;
+ memcpy(&op->owner, owner, sizeof(*owner));
+ INIT_LIST_NODE(&op->list);
+ list_add_tail(&op->list, &logged_vdi_ops);
+}
+
+void play_logged_vdi_ops(void)
+{
+ struct vdi_op_log *op;
+
+ list_for_each_entry(op, &logged_vdi_ops, list) {
+ struct vdi_state entry;
+
+ memset(&entry, 0, sizeof(entry));
+ entry.vid = op->vid;
+ memcpy(&entry.lock_owner, &op->owner,
+ sizeof(op->owner));
+ if (op->lock)
+ entry.lock_state = LOCK_STATE_LOCKED;
+ else
+ entry.lock_state = LOCK_STATE_UNLOCKED;
+
+ apply_vdi_lock_state(&entry);
+ }
+}
+
static struct sd_inode *alloc_inode(const struct vdi_iocb *iocb,
uint32_t new_snapid, uint32_t new_vid,
uint32_t *data_vdi_id,
--
1.7.1
More information about the sheepdog
mailing list