[sheepdog] [PATCH v2 3/6] sheep: store snapshot vid information in memory

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Wed May 1 10:41:46 CEST 2013


This renames vdi_copy to vdi_state and adds a new field 'snapshot' to
it so that we can know whether the vdi is snapshot or not.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 include/sheep.h          |  5 ---
 include/sheepdog_proto.h |  5 +++
 sheep/group.c            | 14 ++++----
 sheep/ops.c              | 11 +++---
 sheep/plain_store.c      |  7 ++--
 sheep/sheep_priv.h       | 11 ++++--
 sheep/vdi.c              | 91 ++++++++++++++++++++++++------------------------
 7 files changed, 77 insertions(+), 67 deletions(-)

diff --git a/include/sheep.h b/include/sheep.h
index 26b9639..1c3d847 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -36,11 +36,6 @@ struct vnode_info {
 	int refcnt;
 };
 
-struct vdi_copy {
-	uint32_t vid;
-	uint32_t nr_copies;
-};
-
 #define TRACE_GRAPH_ENTRY  0x01
 #define TRACE_GRAPH_RETURN 0x02
 
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 886b535..aef97dd 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -138,6 +138,11 @@ struct sd_req {
 			uint32_t	copies;
 			uint32_t	tag;
 		} cluster;
+		struct {
+			uint32_t	old_vid;
+			uint32_t	new_vid;
+			uint32_t	copies;
+		} vdi_state;
 
 		uint32_t		__pad[8];
 	};
diff --git a/sheep/group.c b/sheep/group.c
index 4044d12..bb5d703 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -647,7 +647,7 @@ static int get_vdis_from(struct sd_node *node)
 {
 	struct sd_req hdr;
 	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
-	struct vdi_copy *vc = NULL;
+	struct vdi_state *vs = NULL;
 	int i, ret = SD_RES_SUCCESS;
 	unsigned int rlen;
 	int count;
@@ -656,21 +656,21 @@ static int get_vdis_from(struct sd_node *node)
 		goto out;
 
 	rlen = SD_DATA_OBJ_SIZE; /* FIXME */
-	vc = xzalloc(rlen);
+	vs = xzalloc(rlen);
 	sd_init_req(&hdr, SD_OP_GET_VDI_COPIES);
 	hdr.data_length = rlen;
 	hdr.epoch = sys_epoch();
-	ret = sheep_exec_req(&node->nid, &hdr, (char *)vc);
+	ret = sheep_exec_req(&node->nid, &hdr, (char *)vs);
 	if (ret != SD_RES_SUCCESS)
 		goto out;
 
-	count = rsp->data_length / sizeof(*vc);
+	count = rsp->data_length / sizeof(*vs);
 	for (i = 0; i < count; i++) {
-		set_bit(vc[i].vid, sys->vdi_inuse);
-		add_vdi_copy_number(vc[i].vid, vc[i].nr_copies);
+		set_bit(vs[i].vid, sys->vdi_inuse);
+		add_vdi_state(vs[i].vid, vs[i].nr_copies, vs[i].snapshot);
 	}
 out:
-	free(vc);
+	free(vs);
 	return ret;
 }
 
diff --git a/sheep/ops.c b/sheep/ops.c
index 48dd08c..7f27bc3 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -390,7 +390,7 @@ static int local_read_vdis(const struct sd_req *req, struct sd_rsp *rsp,
 static int local_get_vdi_copies(const struct sd_req *req, struct sd_rsp *rsp,
 			   void *data)
 {
-	rsp->data_length = fill_vdi_copy_list(data);
+	rsp->data_length = fill_vdi_state_list(data);
 
 	return SD_RES_SUCCESS;
 }
@@ -618,10 +618,13 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
 static int cluster_notify_vdi_add(const struct sd_req *req, struct sd_rsp *rsp,
 				  void *data)
 {
-	uint32_t vid = *(uint32_t *)data;
-	uint32_t nr_copies = *(uint32_t *)((char *)data + sizeof(vid));
+	if (req->vdi_state.old_vid)
+		/* make the previous working vdi a snapshot */
+		add_vdi_state(req->vdi_state.old_vid,
+			      get_vdi_copy_number(req->vdi_state.old_vid),
+			      true);
 
-	add_vdi_copy_number(vid, nr_copies);
+	add_vdi_state(req->vdi_state.new_vid, req->vdi_state.copies, false);
 
 	return SD_RES_SUCCESS;
 }
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index b539df1..640f769 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -168,7 +168,7 @@ int default_cleanup(void)
 	return SD_RES_SUCCESS;
 }
 
-static int init_vdi_copy_number(uint64_t oid, char *wd)
+static int init_vdi_state(uint64_t oid, char *wd)
 {
 	char path[PATH_MAX];
 	int fd, flags = get_open_flags(oid, false, 0), ret;
@@ -190,7 +190,8 @@ static int init_vdi_copy_number(uint64_t oid, char *wd)
 		goto out;
 	}
 
-	add_vdi_copy_number(oid_to_vid(oid), inode->nr_copies);
+	add_vdi_state(oid_to_vid(oid), inode->nr_copies,
+		      vdi_is_snapshot(inode));
 
 	ret = SD_RES_SUCCESS;
 out:
@@ -206,7 +207,7 @@ static int init_objlist_and_vdi_bitmap(uint64_t oid, char *wd, void *arg)
 	if (is_vdi_obj(oid)) {
 		sd_dprintf("found the VDI object %" PRIx64, oid);
 		set_bit(oid_to_vid(oid), sys->vdi_inuse);
-		ret = init_vdi_copy_number(oid, wd);
+		ret = init_vdi_state(oid, wd);
 		if (ret != SD_RES_SUCCESS)
 			return ret;
 	}
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 1fc291c..99b44a2 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -149,6 +149,13 @@ struct vdi_info {
 	uint64_t create_time;
 };
 
+struct vdi_state {
+	uint32_t vid;
+	uint8_t nr_copies;
+	uint8_t snapshot;
+	uint16_t _pad;
+};
+
 struct store_driver {
 	struct list_head list;
 	const char *name;
@@ -252,12 +259,12 @@ int init_base_path(const char *dir);
 int init_disk_space(const char *d);
 int lock_base_dir(const char *d);
 
-int fill_vdi_copy_list(void *data);
+int fill_vdi_state_list(void *data);
 int get_vdi_copy_number(uint32_t vid);
 int get_obj_copy_number(uint64_t oid, int nr_zones);
 int get_max_copy_number(void);
 int get_req_copy_number(struct request *req);
-int add_vdi_copy_number(uint32_t vid, int nr_copies);
+int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot);
 int vdi_exist(uint32_t vid);
 int vdi_create(struct vdi_iocb *iocb, uint32_t *new_vid);
 int vdi_delete(struct vdi_iocb *iocb, struct request *req);
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 057dd5a..3085148 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -16,24 +16,25 @@
 #include "sheepdog_proto.h"
 #include "sheep_priv.h"
 
-struct vdi_copy_entry {
+struct vdi_state_entry {
 	uint32_t vid;
 	unsigned int nr_copies;
+	bool snapshot;
 	struct rb_node node;
 };
 
 static uint32_t max_copies;
-static struct rb_root vdi_copy_root = RB_ROOT;
-static pthread_rwlock_t vdi_copy_lock = PTHREAD_RWLOCK_INITIALIZER;
+static struct rb_root vdi_state_root = RB_ROOT;
+static pthread_rwlock_t vdi_state_lock = PTHREAD_RWLOCK_INITIALIZER;
 
-static struct vdi_copy_entry *vdi_copy_search(struct rb_root *root,
-					      uint32_t vid)
+static struct vdi_state_entry *vdi_state_search(struct rb_root *root,
+						uint32_t vid)
 {
 	struct rb_node *n = root->rb_node;
-	struct vdi_copy_entry *t;
+	struct vdi_state_entry *t;
 
 	while (n) {
-		t = rb_entry(n, struct vdi_copy_entry, node);
+		t = rb_entry(n, struct vdi_state_entry, node);
 
 		if (vid < t->vid)
 			n = n->rb_left;
@@ -46,16 +47,16 @@ static struct vdi_copy_entry *vdi_copy_search(struct rb_root *root,
 	return NULL;
 }
 
-static struct vdi_copy_entry *vdi_copy_insert(struct rb_root *root,
-					      struct vdi_copy_entry *new)
+static struct vdi_state_entry *vdi_state_insert(struct rb_root *root,
+						struct vdi_state_entry *new)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
-	struct vdi_copy_entry *entry;
+	struct vdi_state_entry *entry;
 
 	while (*p) {
 		parent = *p;
-		entry = rb_entry(parent, struct vdi_copy_entry, node);
+		entry = rb_entry(parent, struct vdi_state_entry, node);
 
 		if (new->vid < entry->vid)
 			p = &(*p)->rb_left;
@@ -72,11 +73,11 @@ static struct vdi_copy_entry *vdi_copy_insert(struct rb_root *root,
 
 int get_vdi_copy_number(uint32_t vid)
 {
-	struct vdi_copy_entry *entry;
+	struct vdi_state_entry *entry;
 
-	pthread_rwlock_rdlock(&vdi_copy_lock);
-	entry = vdi_copy_search(&vdi_copy_root, vid);
-	pthread_rwlock_unlock(&vdi_copy_lock);
+	pthread_rwlock_rdlock(&vdi_state_lock);
+	entry = vdi_state_search(&vdi_state_root, vid);
+	pthread_rwlock_unlock(&vdi_state_lock);
 
 	if (!entry) {
 		sd_eprintf("No VDI copy entry for %" PRIx32 " found", vid);
@@ -113,50 +114,54 @@ int get_max_copy_number(void)
 	return nr_copies;
 }
 
-int add_vdi_copy_number(uint32_t vid, int nr_copies)
+int add_vdi_state(uint32_t vid, int nr_copies, bool snapshot)
 {
-	struct vdi_copy_entry *entry, *old;
+	struct vdi_state_entry *entry, *old;
 
 	entry = xzalloc(sizeof(*entry));
 	entry->vid = vid;
 	entry->nr_copies = nr_copies;
+	entry->snapshot = snapshot;
 
 	sd_dprintf("%" PRIx32 ", %d", vid, nr_copies);
 
-	pthread_rwlock_wrlock(&vdi_copy_lock);
-	old = vdi_copy_insert(&vdi_copy_root, entry);
+	pthread_rwlock_wrlock(&vdi_state_lock);
+	old = vdi_state_insert(&vdi_state_root, entry);
 	if (old) {
 		free(entry);
 		entry = old;
 		entry->nr_copies = nr_copies;
+		entry->snapshot = snapshot;
 	}
 
 	if (uatomic_read(&max_copies) == 0 ||
 	    nr_copies > uatomic_read(&max_copies))
 		uatomic_set(&max_copies, nr_copies);
-	pthread_rwlock_unlock(&vdi_copy_lock);
+	pthread_rwlock_unlock(&vdi_state_lock);
 
 	return SD_RES_SUCCESS;
 }
 
-int fill_vdi_copy_list(void *data)
+int fill_vdi_state_list(void *data)
 {
 	int nr = 0;
 	struct rb_node *n;
-	struct vdi_copy *vc = data;
-	struct vdi_copy_entry *entry;
-
-	pthread_rwlock_rdlock(&vdi_copy_lock);
-	for (n = rb_first(&vdi_copy_root); n; n = rb_next(n)) {
-		entry = rb_entry(n, struct vdi_copy_entry, node);
-		vc->vid = entry->vid;
-		vc->nr_copies = entry->nr_copies;
-		vc++;
+	struct vdi_state *vs = data;
+	struct vdi_state_entry *entry;
+
+	pthread_rwlock_rdlock(&vdi_state_lock);
+	for (n = rb_first(&vdi_state_root); n; n = rb_next(n)) {
+		entry = rb_entry(n, struct vdi_state_entry, node);
+		memset(vs, 0, sizeof(*vs));
+		vs->vid = entry->vid;
+		vs->nr_copies = entry->nr_copies;
+		vs->snapshot = entry->snapshot;
+		vs++;
 		nr++;
 	}
-	pthread_rwlock_unlock(&vdi_copy_lock);
+	pthread_rwlock_unlock(&vdi_state_lock);
 
-	return nr * sizeof(*vc);
+	return nr * sizeof(*vs);
 }
 
 static inline bool vdi_is_deleted(struct sd_inode *inode)
@@ -443,26 +448,20 @@ int vdi_lookup(struct vdi_iocb *iocb, struct vdi_info *info)
 	return fill_vdi_info(left, right, iocb, info);
 }
 
-static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies)
+static int notify_vdi_add(uint32_t vdi_id, uint32_t nr_copies, uint32_t old_vid)
 {
 	int ret = SD_RES_SUCCESS;
 	struct sd_req hdr;
-	char *buf;
 
 	sd_init_req(&hdr, SD_OP_NOTIFY_VDI_ADD);
-	hdr.flags = SD_FLAG_CMD_WRITE;
-	hdr.data_length = sizeof(vdi_id) + sizeof(nr_copies);
-
-	buf = xmalloc(sizeof(vdi_id) + sizeof(nr_copies));
-	memcpy(buf, &vdi_id, sizeof(vdi_id));
-	memcpy(buf + sizeof(vdi_id), &nr_copies, sizeof(nr_copies));
+	hdr.vdi_state.old_vid = old_vid;
+	hdr.vdi_state.new_vid = vdi_id;
+	hdr.vdi_state.copies = nr_copies;
 
-	ret = exec_local_req(&hdr, buf);
+	ret = exec_local_req(&hdr, NULL);
 	if (ret != SD_RES_SUCCESS)
-		sd_eprintf("fail to notify vdi add event(%" PRIx32 ", %d)",
-			   vdi_id, nr_copies);
-
-	free(buf);
+		sd_eprintf("fail to notify vdi add event(%" PRIx32 ", %d, %"
+			   PRIx32 ")", vdi_id, nr_copies, old_vid);
 
 	return ret;
 }
@@ -499,7 +498,7 @@ int vdi_create(struct vdi_iocb *iocb, uint32_t *new_vid)
 	if (!iocb->snapid)
 		iocb->snapid = 1;
 	*new_vid = info.free_bit;
-	notify_vdi_add(*new_vid, iocb->nr_copies);
+	notify_vdi_add(*new_vid, iocb->nr_copies, info.vid);
 
 	sd_dprintf("%s %s: size %" PRIu64 ", vid %" PRIx32 ", base %" PRIx32
 		   ", cur %" PRIx32 ", copies %d, snapid %"PRIu32,
-- 
1.8.1.3.566.gaa39828




More information about the sheepdog mailing list