[sheepdog] [PATCH v5 08/16] collie/farm: implement object_tree

Kai Zhang kyle at zelin.io
Mon May 20 09:50:38 CEST 2013


object_tree is a red black tree which is used to de-duplicate oid.
So that objects with the same oid in the cluster will be read only once when doing cluster snapshot.

When user sends cluster snapshot command, collie will:
1. parse vdis
2. save readonly object's id into object_tree, duplicate id will be saved only once
3. for each oid in object_tree, read the object and save to farm

object_tree has 5 public interfaces:
1. object_tree_size()
   returns current object number in the tree
2. object_tree_insert()
   insert a object id into the tree
3. object_tree_free()
   free memory
4. object_tree_free()
   print each node of tree for debug
5. for_each_object_in_tree()
   foreach node in the tree, read object from cluster and call the function.

Signed-off-by: Kai Zhang <kyle at zelin.io>
---
 collie/Makefile.am        |    3 +-
 collie/farm/farm.h        |   32 +++++++++++
 collie/farm/object_tree.c |  133 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 167 insertions(+), 1 deletions(-)
 create mode 100644 collie/farm/farm.h
 create mode 100644 collie/farm/object_tree.c

diff --git a/collie/Makefile.am b/collie/Makefile.am
index 3fba2dd..3fdb147 100644
--- a/collie/Makefile.am
+++ b/collie/Makefile.am
@@ -23,7 +23,8 @@ INCLUDES		= -I$(top_builddir)/include -I$(top_srcdir)/include
 
 sbin_PROGRAMS		= collie
 
-collie_SOURCES		= collie.c common.c treeview.c vdi.c node.c cluster.c
+collie_SOURCES		= farm/object_tree.c \
+			  collie.c common.c treeview.c vdi.c node.c cluster.c
 
 if BUILD_TRACE
 collie_SOURCES          += debug.c
diff --git a/collie/farm/farm.h b/collie/farm/farm.h
new file mode 100644
index 0000000..d1cfee1
--- /dev/null
+++ b/collie/farm/farm.h
@@ -0,0 +1,32 @@
+#ifndef FARM_H
+#define FARM_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <memory.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <linux/limits.h>
+
+#include "collie.h"
+#include "sheepdog_proto.h"
+#include "sheep.h"
+#include "logger.h"
+#include "strbuf.h"
+#include "sha1.h"
+
+/* object_tree.c */
+int object_tree_size(void);
+void object_tree_insert(uint64_t oid, int nr_copies);
+void object_tree_free(void);
+void object_tree_print(void);
+typedef int (*object_handler_func_t)(uint64_t oid, int nr_copies, void *buf,
+				     size_t size, void *data);
+int for_each_object_in_tree(object_handler_func_t func, void *data);
+
+#endif
diff --git a/collie/farm/object_tree.c b/collie/farm/object_tree.c
new file mode 100644
index 0000000..b424f12
--- /dev/null
+++ b/collie/farm/object_tree.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2013 Zelin.io
+ *
+ * Kai Zhang <kyle at zelin.io>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "farm.h"
+#include "rbtree.h"
+
+struct obj_rb_entry {
+	uint64_t oid;
+	int nr_copies;
+	struct rb_node node;
+	struct list_head list;
+};
+
+struct obj_rb_tree {
+	int nr_objs;
+	struct rb_root root;
+	struct list_head list;
+};
+
+static struct obj_rb_tree obj_tree = {
+	.nr_objs = 0,
+	.root = RB_ROOT,
+	.list = LIST_HEAD_INIT(obj_tree.list)
+};
+
+static struct obj_rb_entry *do_insert(struct rb_root *root,
+				      struct obj_rb_entry *new)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct obj_rb_entry *entry;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct obj_rb_entry, node);
+
+		if (new->oid < entry->oid)
+			p = &(*p)->rb_left;
+		else if (new->oid > entry->oid)
+			p = &(*p)->rb_right;
+		else
+			return entry; /* already has this entry */
+	}
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, root);
+
+	return NULL; /* insert sucessfully */
+}
+
+static struct obj_rb_entry *cached_entry;
+void object_tree_insert(uint64_t oid, int nr_copies)
+{
+	struct rb_root *root = &obj_tree.root;
+	struct obj_rb_entry *p = NULL;
+
+	if (!cached_entry)
+		cached_entry = xzalloc(sizeof(*cached_entry));
+	cached_entry->oid = oid;
+	cached_entry->nr_copies = nr_copies;
+	rb_init_node(&cached_entry->node);
+	p = do_insert(root, cached_entry);
+	if (!p) {
+		list_add(&cached_entry->list, &obj_tree.list);
+		obj_tree.nr_objs++;
+		cached_entry = NULL;
+	}
+}
+
+void object_tree_print(void)
+{
+	struct rb_node *p = rb_first(&obj_tree.root);
+	struct obj_rb_entry *entry;
+	printf("nr_objs: %d\n", obj_tree.nr_objs);
+
+	while (p) {
+		entry = rb_entry(p, struct obj_rb_entry, node);
+		printf("Obj id: %"PRIu64"\n", entry->oid);
+		p = rb_next(p);
+	}
+}
+
+void object_tree_free(void)
+{
+	struct obj_rb_entry *entry, *next;
+	list_for_each_entry_safe(entry, next, &obj_tree.list, list)
+		free(entry);
+
+	free(cached_entry);
+}
+
+int object_tree_size()
+{
+	return obj_tree.nr_objs;
+}
+
+int for_each_object_in_tree(object_handler_func_t func, void *data)
+{
+	struct rb_node *p = rb_first(&obj_tree.root);
+	struct obj_rb_entry *entry;
+	uint64_t oid;
+	size_t size;
+	void *buf = xmalloc(max(SD_INODE_SIZE, SD_DATA_OBJ_SIZE));
+	int ret = -1;
+
+	while (p) {
+		entry = rb_entry(p, struct obj_rb_entry, node);
+		oid = entry->oid;
+		size = get_objsize(oid);
+
+		if (sd_read_object(oid, buf, size, 0, true) < 0)
+			goto out;
+
+		if (func(oid, entry->nr_copies,
+			 buf, size, data) < 0)
+			goto out;
+
+		p = rb_next(p);
+	}
+	ret = 0;
+out:
+	free(buf);
+	return ret;
+}
-- 
1.7.1



More information about the sheepdog mailing list