[sheepdog] [PATCH] sheep: notify vdi copies number before we create the vdi

levin li levin108 at gmail.com
Tue Nov 27 12:19:51 CET 2012


From: levin li <xingke.lwp at taobao.com>

We can not notify the vdi copies number to the cluster after creating
the vdi, because if a recovery happens in the time window after
cluster_vdi_add() finishes, and just before post_cluster_vdi_add()
notifies the copies number of the new vdi to all the nodes in the cluster,
it may causes screen_object_list() fail to get vdi copies number of
the objects of the newly created vdi, and we can not make it wait for
the copies number because it would cause another race condition.

Notify the copies number before we create the vdi is a good solution to
this problem, it has no harm even if the creation of vdi fails.

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 include/internal_proto.h |    1 +
 sheep/ops.c              |   22 +++++++++++++++++++---
 sheep/vdi.c              |   26 ++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/include/internal_proto.h b/include/internal_proto.h
index b4199ca..e2c1baa 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -66,6 +66,7 @@
 #define SD_OP_COMPLETE_RECOVERY 0xAC
 #define SD_OP_FLUSH_NODES 0xAD
 #define SD_OP_FLUSH_PEER 0xAE
+#define SD_OP_NOTIFY_VDI_ADD  0xAF
 
 /* internal flags for hdr.flags, must be above 0x80 */
 #define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/sheep/ops.c b/sheep/ops.c
index f99dff6..8862b78 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -137,10 +137,8 @@ static int post_cluster_new_vdi(const struct sd_req *req, struct sd_rsp *rsp,
 	int ret = rsp->result;
 
 	vprintf(SDOG_INFO, "done %d %ld\n", ret, nr);
-	if (ret == SD_RES_SUCCESS) {
+	if (ret == SD_RES_SUCCESS)
 		set_bit(nr, sys->vdi_inuse);
-		add_vdi_copy_number(nr, rsp->vdi.copies);
-	}
 
 	return ret;
 }
@@ -560,6 +558,17 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
 	return ret;
 }
 
+static int cluster_notify_vdi_add(const struct sd_req *req, struct sd_rsp *rsp,
+				  void *data)
+{
+	uint32_t vid = *(uint32_t *)data;
+	uint32_t nr_copies = *(uint32_t *)((char *)data + sizeof(vid));
+
+	add_vdi_copy_number(vid, nr_copies);
+
+	return SD_RES_SUCCESS;
+}
+
 static int cluster_notify_vdi_del(const struct sd_req *req, struct sd_rsp *rsp,
 				  void *data)
 {
@@ -962,6 +971,13 @@ static struct sd_op_template sd_ops[] = {
 		.process_main = cluster_notify_vdi_del,
 	},
 
+	[SD_OP_NOTIFY_VDI_ADD] = {
+		.name = "NOTIFY_VDI_ADD",
+		.type = SD_OP_TYPE_CLUSTER,
+		.force = true,
+		.process_main = cluster_notify_vdi_add,
+	},
+
 	[SD_OP_COMPLETE_RECOVERY] = {
 		.name = "COMPLETE_RECOVERY",
 		.type = SD_OP_TYPE_CLUSTER,
diff --git a/sheep/vdi.c b/sheep/vdi.c
index f98a1f6..eabe19e 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -438,6 +438,30 @@ int lookup_vdi(const char *name, const char *tag, uint32_t *vid,
 			     create_time);
 }
 
+static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies)
+{
+	struct sd_req hdr;
+	int ret = SD_RES_SUCCESS;
+	char *buf;
+
+	sd_init_req(&hdr, SD_OP_NOTIFY_VDI_ADD);
+	hdr.flags = SD_FLAG_CMD_WRITE;
+	hdr.data_length = sizeof(vdi_id) + sizeof(nr_copies);
+
+	buf = xmalloc(sizeof(vdi_id) + sizeof(nr_copies));
+	memcpy(buf, &vdi_id, sizeof(vdi_id));
+	memcpy(buf + sizeof(vdi_id), &nr_copies, sizeof(nr_copies));
+
+	ret = exec_local_req(&hdr, buf);
+	if (ret != SD_RES_SUCCESS)
+		eprintf("fail to notify vdi add event(%" PRIx32 ", %d)\n",
+			vdi_id, nr_copies);
+
+	free(buf);
+
+	return ret;
+}
+
 int add_vdi(struct vdi_iocb *iocb, uint32_t *new_vid)
 {
 	uint32_t cur_vid = 0;
@@ -479,6 +503,8 @@ int add_vdi(struct vdi_iocb *iocb, uint32_t *new_vid)
 
 	*new_vid = nr;
 
+	notify_vdi_add(nr, iocb->nr_copies);
+
 	vprintf(SDOG_INFO, "creating new %s %s: size %" PRIu64 ", vid %"
 		PRIx32 ", base %" PRIx32 ", cur %" PRIx32 ", copies %d\n",
 		iocb->create_snapshot ? "snapshot" : "vdi", name, iocb->size,
-- 
1.7.1




More information about the sheepdog mailing list