[sheepdog] [PATCH v2 05/12] collie/farm: implement object_rb_tree

Kai Zhang kyle at zelin.io
Tue May 14 09:16:43 CEST 2013


object_rb_tree is a red black tree which is used to de-duplicate oid.
So that objects with the same oid will be read only once when doing cluster snapshot.

Signed-off-by: Kai Zhang <kyle at zelin.io>
---
 collie/Makefile.am           |    3 +-
 collie/farm/farm.h           |   38 +++++++++++
 collie/farm/object_rb_tree.c |  148 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 188 insertions(+), 1 deletions(-)
 create mode 100644 collie/farm/farm.h
 create mode 100644 collie/farm/object_rb_tree.c

diff --git a/collie/Makefile.am b/collie/Makefile.am
index 386aa49..9c30495 100644
--- a/collie/Makefile.am
+++ b/collie/Makefile.am
@@ -23,7 +23,8 @@ INCLUDES		= -I$(top_builddir)/include -I$(top_srcdir)/include
 
 sbin_PROGRAMS		= collie
 
-collie_SOURCES		= collie.c common.c treeview.c vdi.c node.c cluster.c
+collie_SOURCES		= farm/object_rb_tree.c \
+			  collie.c common.c treeview.c vdi.c node.c cluster.c
 
 if BUILD_TRACE
 collie_SOURCES          += debug.c
diff --git a/collie/farm/farm.h b/collie/farm/farm.h
new file mode 100644
index 0000000..3d08396
--- /dev/null
+++ b/collie/farm/farm.h
@@ -0,0 +1,38 @@
+#ifndef FARM_H
+#define FARM_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <memory.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <linux/limits.h>
+
+#include "collie.h"
+#include "sheepdog_proto.h"
+#include "sheep.h"
+#include "logger.h"
+#include "strbuf.h"
+#include "sha1.h"
+
+enum obj_type {
+	OBJECT = 1,
+	VDI,
+};
+
+/* object_rb_tree.c */
+int get_obj_nr(void);
+void obj_tree_insert(uint64_t oid, enum obj_type type, int nr_copies);
+void free_obj_tree(void);
+void print_obj_tree(void);
+typedef int (*obj_parser_func_t)(uint64_t oid, enum obj_type type,
+				 int nr_copies, void *buf,
+				 size_t size, void *data);
+int parse_obj(obj_parser_func_t func, void *data);
+
+#endif
diff --git a/collie/farm/object_rb_tree.c b/collie/farm/object_rb_tree.c
new file mode 100644
index 0000000..3ac7089
--- /dev/null
+++ b/collie/farm/object_rb_tree.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2013 Zelin Inc.
+ *
+ * Kai Zhang <kyle at zelin.io>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "farm.h"
+#include "rbtree.h"
+
+struct obj_rb_entry {
+	uint64_t oid;
+	enum obj_type type;
+	int nr_copies;
+	struct rb_node node;
+	struct list_head list;
+};
+
+struct obj_rb_tree {
+	int nr_objs;
+	struct rb_root root;
+	struct list_head list;
+};
+
+struct obj_rb_tree obj_tree = {
+	.nr_objs = 0,
+	.root = RB_ROOT,
+	.list = LIST_HEAD_INIT(obj_tree.list)
+};
+
+static struct obj_rb_entry *do_insert(struct rb_root *root,
+				      struct obj_rb_entry *new)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct obj_rb_entry *entry;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct obj_rb_entry, node);
+
+		if (new->oid < entry->oid)
+			p = &(*p)->rb_left;
+		else if (new->oid > entry->oid)
+			p = &(*p)->rb_right;
+		else
+			return entry; /* already has this entry */
+	}
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, root);
+
+	return NULL; /* insert sucessfully */
+}
+
+static struct obj_rb_entry *cached_entry;
+void obj_tree_insert(uint64_t oid, enum obj_type type, int nr_copies)
+{
+	struct rb_root *root = &obj_tree.root;
+	struct obj_rb_entry *p = NULL;
+
+	if (!cached_entry)
+		cached_entry = xzalloc(sizeof(*cached_entry));
+	cached_entry->oid = oid;
+	cached_entry->type = type;
+	cached_entry->nr_copies = nr_copies;
+	rb_init_node(&cached_entry->node);
+	p = do_insert(root, cached_entry);
+	if (!p) {
+		list_add(&cached_entry->list, &obj_tree.list);
+		obj_tree.nr_objs++;
+		cached_entry = NULL;
+	}
+}
+
+void print_obj_tree(void)
+{
+	struct rb_node *p = rb_first(&obj_tree.root);
+	struct obj_rb_entry *entry;
+	printf("nr_objs: %d\n", obj_tree.nr_objs);
+
+	while (p) {
+		entry = rb_entry(p, struct obj_rb_entry, node);
+		printf("Obj id: %"PRIu64"\n", entry->oid);
+		p = rb_next(p);
+	}
+}
+
+void free_obj_tree(void)
+{
+	struct obj_rb_entry *entry, *next;
+	list_for_each_entry_safe(entry, next, &obj_tree.list, list) {
+		free(entry);
+	}
+	if (cached_entry)
+		free(cached_entry);
+}
+
+int get_obj_nr()
+{
+	return obj_tree.nr_objs;
+}
+
+int parse_obj(obj_parser_func_t func, void *data)
+{
+	struct rb_node *p = rb_first(&obj_tree.root);
+	struct obj_rb_entry *entry;
+	uint64_t oid;
+	void *obj_buf = xmalloc(SD_DATA_OBJ_SIZE);
+	void *vdi_buf = xmalloc(SD_INODE_SIZE);
+	void *buf;
+	size_t size;
+	int ret = -1;
+
+	while (p) {
+		entry = rb_entry(p, struct obj_rb_entry, node);
+		oid = entry->oid;
+		if (entry->type == OBJECT) {
+			buf = obj_buf;
+			size = SD_DATA_OBJ_SIZE;
+		} else if (entry->type == VDI) {
+			buf = vdi_buf;
+			size = SD_INODE_SIZE;
+		} else {
+			sd_eprintf("unknow object type %d", entry->type);
+			goto out;
+		}
+
+		if (sd_read_object(oid, buf, size, 0, true) < 0)
+			goto out;
+
+		if (func(oid, entry->type, entry->nr_copies,
+				buf, size, data) < 0)
+			goto out;
+
+		p = rb_next(p);
+	}
+	ret = 0;
+out:
+	free(obj_buf);
+	free(vdi_buf);
+	return ret;
+}
-- 
1.7.1




More information about the sheepdog mailing list