[sheepdog] [PATCH 3/6] sheep: add vdi_copy tree to store copies number for every VDI

levin li levin108 at gmail.com
Mon Aug 6 07:59:56 CEST 2012


From: levin li <xingke.lwp at taobao.com>

The normal requests from QEMU include the copies number in its header,
QEMU stores the nr_copies the first time it reads the inode data, but
many other local requests such as read_copy_from_replica and
recover_object_from_replica don't know the copies number for every object,
so this tree is necessary to keep the copies number

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 sheep/Makefile.am         |    2 +-
 sheep/object_list_cache.c |    1 +
 sheep/ops.c               |    4 +-
 sheep/sheep_priv.h        |    5 ++
 sheep/vdi_copies.c        |  115 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 125 insertions(+), 2 deletions(-)
 create mode 100644 sheep/vdi_copies.c

diff --git a/sheep/Makefile.am b/sheep/Makefile.am
index 16c79f0..3d4eea1 100644
--- a/sheep/Makefile.am
+++ b/sheep/Makefile.am
@@ -25,7 +25,7 @@ INCLUDES		= -I$(top_builddir)/include -I$(top_srcdir)/include \
 sbin_PROGRAMS		= sheep
 
 sheep_SOURCES		= sheep.c group.c request.c gateway.c store.c vdi.c work.c \
-			  journal.c ops.c recovery.c cluster/local.c \
+			  journal.c ops.c recovery.c cluster/local.c vdi_copies.c \
 			  object_cache.c object_list_cache.c sockfd_cache.c
 
 if BUILD_COROSYNC
diff --git a/sheep/object_list_cache.c b/sheep/object_list_cache.c
index df94dce..4ca30cd 100644
--- a/sheep/object_list_cache.c
+++ b/sheep/object_list_cache.c
@@ -210,6 +210,7 @@ static void objlist_deletion_done(struct work *work)
 	struct objlist_deletion_work *ow =
 		container_of(work, struct objlist_deletion_work, work);
 	free(ow);
+
 }
 
 /*
diff --git a/sheep/ops.c b/sheep/ops.c
index 062a7b4..efaf979 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -141,8 +141,10 @@ static int post_cluster_new_vdi(const struct sd_req *req, struct sd_rsp *rsp,
 	int ret = rsp->result;
 
 	vprintf(SDOG_INFO, "done %d %ld\n", ret, nr);
-	if (ret == SD_RES_SUCCESS)
+	if (ret == SD_RES_SUCCESS) {
 		set_bit(nr, sys->vdi_inuse);
+		add_vdi_copies(nr, rsp->vdi.copies);
+	}
 
 	return ret;
 }
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index afd8c33..b6830ae 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -200,6 +200,11 @@ int create_listen_port(int port, void *data);
 int init_store(const char *dir, int enable_write_cache);
 int init_base_path(const char *dir);
 
+int get_vdi_copies(uint32_t vid);
+int get_obj_copies(uint64_t oid);
+int get_min_copies(void);
+int get_req_copies(struct request *req);
+void add_vdi_copies(uint32_t vid, int nr_copies);
 int vdi_exist(uint32_t vid);
 int add_vdi(struct vdi_iocb *iocb, uint32_t *new_vid);
 
diff --git a/sheep/vdi_copies.c b/sheep/vdi_copies.c
new file mode 100644
index 0000000..d54308c
--- /dev/null
+++ b/sheep/vdi_copies.c
@@ -0,0 +1,115 @@
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+#include "sheepdog_proto.h"
+#include "sheep_priv.h"
+
+struct vdi_copy_entry {
+	uint32_t vid;
+	unsigned int nr_copies;
+	struct rb_node node;
+};
+
+uint32_t min_copies = SD_DEFAULT_COPIES;
+static struct rb_root vdi_copy_root = RB_ROOT;
+pthread_rwlock_t vdi_copy_lock = PTHREAD_RWLOCK_INITIALIZER;
+
+static struct vdi_copy_entry *vdi_copy_search(struct rb_root *root,
+					      uint32_t vid)
+{
+	struct rb_node *n = root->rb_node;
+	struct vdi_copy_entry *t;
+
+	while (n) {
+		t = rb_entry(n, struct vdi_copy_entry, node);
+
+		if (vid < t->vid)
+			n = n->rb_left;
+		else if (vid > t->vid)
+			n = n->rb_right;
+		else
+			return t;
+	}
+
+	return NULL;
+}
+
+static struct vdi_copy_entry *vdi_copy_insert(struct rb_root *root,
+					      struct vdi_copy_entry *new)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct vdi_copy_entry *entry;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct vdi_copy_entry, node);
+
+		if (new->vid < entry->vid)
+			p = &(*p)->rb_left;
+		else if (new->vid > entry->vid)
+			p = &(*p)->rb_right;
+		else
+			return entry; /* already has this entry */
+	}
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, root);
+
+	return NULL; /* insert successfully */
+}
+
+int get_vdi_copies(uint32_t vid)
+{
+	struct vdi_copy_entry *entry;
+
+	pthread_rwlock_rdlock(&vdi_copy_lock);
+	entry = vdi_copy_search(&vdi_copy_root, vid);
+	pthread_rwlock_unlock(&vdi_copy_lock);
+
+	if (!entry)
+		panic("No VDI copy entry for %" PRIx32 " found\n", vid);
+
+	return entry->nr_copies;
+}
+
+int get_obj_copies(uint64_t oid)
+{
+	uint32_t vid = oid_to_vid(oid);
+
+	return get_vdi_copies(vid);
+}
+
+int get_req_copies(struct request *req)
+{
+	int nr_copies;
+
+	nr_copies = req->rq.obj.copies;
+	if (!nr_copies)
+		nr_copies = get_obj_copies(req->rq.obj.oid);
+
+	return nr_copies;
+}
+
+int get_min_copies(void)
+{
+	return uatomic_read(&min_copies);
+}
+
+void add_vdi_copies(uint32_t vid, int nr_copies)
+{
+	struct vdi_copy_entry *entry;
+
+	entry = xzalloc(sizeof(*entry));
+	entry->vid = vid;
+	entry->nr_copies = nr_copies;
+
+	pthread_rwlock_wrlock(&vdi_copy_lock);
+	if (vdi_copy_insert(&vdi_copy_root, entry))
+		panic("VDI copy entry for %" PRIx32 " already exists\n", vid);
+	if (uatomic_read(&min_copies) == 0 ||
+	    nr_copies < uatomic_read(&min_copies))
+		uatomic_set(&min_copies, nr_copies);
+	pthread_rwlock_unlock(&vdi_copy_lock);
+}
-- 
1.7.1




More information about the sheepdog mailing list