[sheepdog] [PATCH v2 2/3] dog: add "block_size_shift" option to cluster format command

Teruaki Ishizaki ishizaki.teruaki at lab.ntt.co.jp
Fri Dec 12 13:48:31 CET 2014


This patch changes for specifying cluster default block_size_shift.

Signed-off-by: Teruaki Ishizaki <ishizaki.teruaki at lab.ntt.co.jp>
---
 dog/cluster.c            |   37 +++++++++++++++++++++++++++++++------
 dog/farm/farm.c          |    5 ++++-
 dog/farm/farm.h          |   12 ++++++++----
 dog/farm/object_tree.c   |   10 +++++++---
 include/sheepdog_proto.h |    1 +
 sheep/ops.c              |    3 +++
 6 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/dog/cluster.c b/dog/cluster.c
index e42bc54..20f190b 100644
--- a/dog/cluster.c
+++ b/dog/cluster.c
@@ -25,6 +25,8 @@ static struct sd_option cluster_options[] = {
 	 "use multi-thread for 'cluster snapshot save'"},
 	{'t', "strict", false,
 	 "do not serve write request if number of nodes is not sufficient"},
+	{'z', "block_size_shift", true, "specify the shift num of default"
+	      " data object size"},
 	{ 0, NULL, false, NULL },
 };
 
@@ -32,6 +34,7 @@ static struct cluster_cmd_data {
 	uint8_t copies;
 	uint8_t copy_policy;
 	uint8_t multithread;
+	uint8_t block_size_shift;
 	bool force;
 	bool strict;
 	char name[STORE_LEN];
@@ -113,6 +116,7 @@ static int cluster_format(int argc, char **argv)
 	sd_init_req(&hdr, SD_OP_MAKE_FS);
 	hdr.cluster.copies = cluster_cmd_data.copies;
 	hdr.cluster.copy_policy = cluster_cmd_data.copy_policy;
+	hdr.cluster.block_size_shift = cluster_cmd_data.block_size_shift;
 	hdr.cluster.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000;
 
 	if (strlen(cluster_cmd_data.name))
@@ -335,7 +339,7 @@ static void fill_cb(struct sd_index *idx, void *arg, int ignore)
 	if (idx->vdi_id) {
 		oid = vid_to_data_oid(idx->vdi_id, idx->idx);
 		object_tree_insert(oid, inode->nr_copies,
-				   inode->copy_policy);
+				   inode->copy_policy, inode->block_size_shift);
 	}
 }
 
@@ -346,6 +350,7 @@ static void fill_object_tree(uint32_t vid, const char *name, const char *tag,
 	uint64_t vdi_oid = vid_to_vdi_oid(vid), vmstate_oid;
 	uint32_t vdi_id;
 	uint32_t nr_objs, nr_vmstate_object;
+	uint32_t object_size = (UINT32_C(1) << i->block_size_shift);
 	struct vdi_option *opt = (struct vdi_option *)data;
 	bool matched;
 
@@ -369,7 +374,8 @@ static void fill_object_tree(uint32_t vid, const char *name, const char *tag,
 		opt->nr_snapshot++;
 
 	/* fill vdi object id */
-	object_tree_insert(vdi_oid, i->nr_copies, i->copy_policy);
+	object_tree_insert(vdi_oid, i->nr_copies, i->copy_policy,
+			   i->block_size_shift);
 
 	/* fill data object id */
 	if (i->store_policy == 0) {
@@ -379,16 +385,18 @@ static void fill_object_tree(uint32_t vid, const char *name, const char *tag,
 			if (!vdi_id)
 				continue;
 			uint64_t oid = vid_to_data_oid(vdi_id, idx);
-			object_tree_insert(oid, i->nr_copies, i->copy_policy);
+			object_tree_insert(oid, i->nr_copies, i->copy_policy,
+					   i->block_size_shift);
 		}
 	} else
 		sd_inode_index_walk(i, fill_cb, &i);
 
 	/* fill vmstate object id */
-	nr_vmstate_object = DIV_ROUND_UP(i->vm_state_size, SD_DATA_OBJ_SIZE);
+	nr_vmstate_object = DIV_ROUND_UP(i->vm_state_size, object_size);
 	for (uint32_t idx = 0; idx < nr_vmstate_object; idx++) {
 		vmstate_oid = vid_to_vmstate_oid(vid, idx);
-		object_tree_insert(vmstate_oid, i->nr_copies, i->copy_policy);
+		object_tree_insert(vmstate_oid, i->nr_copies,
+				   i->copy_policy, i->block_size_shift);
 	}
 }
 
@@ -483,6 +491,7 @@ static int load_snapshot(int argc, char **argv)
 
 	cluster_cmd_data.copies = hdr.copy_number;
 	cluster_cmd_data.copy_policy = hdr.copy_policy;
+	cluster_cmd_data.block_size_shift = hdr.block_size_shift;
 	if (cluster_format(0, NULL) != SD_RES_SUCCESS)
 		goto out;
 
@@ -752,7 +761,7 @@ failure:
 static struct subcommand cluster_cmd[] = {
 	{"info", NULL, "aprhvT", "show cluster information",
 	 NULL, CMD_NEED_NODELIST, cluster_info, cluster_options},
-	{"format", NULL, "bctaphT", "create a Sheepdog store",
+	{"format", NULL, "bctaphzT", "create a Sheepdog store",
 	 NULL, CMD_NEED_NODELIST, cluster_format, cluster_options},
 	{"shutdown", NULL, "aphT", "stop Sheepdog",
 	 NULL, 0, cluster_shutdown, cluster_options},
@@ -775,6 +784,7 @@ static struct subcommand cluster_cmd[] = {
 
 static int cluster_parser(int ch, const char *opt)
 {
+	uint32_t block_size_shift;
 	switch (ch) {
 	case 'b':
 		pstrcpy(cluster_cmd_data.name, sizeof(cluster_cmd_data.name),
@@ -802,6 +812,21 @@ static int cluster_parser(int ch, const char *opt)
 	case 't':
 		cluster_cmd_data.strict = true;
 		break;
+	case 'z':
+		block_size_shift = (uint32_t)atoi(opt);
+		if (block_size_shift > 31) {
+			sd_err("Object Size is limited to 2^31."
+			" Please set shift bit lower than 31");
+			exit(EXIT_FAILURE);
+		} else if (block_size_shift < 20) {
+			sd_err("Object Size is larger than 2^20."
+			" Please set shift bit larger than 20");
+			exit(EXIT_FAILURE);
+		}
+
+		cluster_cmd_data.block_size_shift = block_size_shift;
+
+		break;
 	}
 
 	return 0;
diff --git a/dog/farm/farm.c b/dog/farm/farm.c
index c87d488..5c8ca3b 100644
--- a/dog/farm/farm.c
+++ b/dog/farm/farm.c
@@ -299,7 +299,8 @@ out:
 }
 
 static int queue_save_snapshot_work(uint64_t oid, uint32_t nr_copies,
-				    uint8_t copy_policy, void *data)
+				    uint8_t copy_policy,
+				    uint8_t block_size_shift, void *data)
 {
 	struct snapshot_work *sw = xzalloc(sizeof(struct snapshot_work));
 	struct strbuf *trunk_buf = data;
@@ -307,6 +308,7 @@ static int queue_save_snapshot_work(uint64_t oid, uint32_t nr_copies,
 	sw->entry.oid = oid;
 	sw->entry.nr_copies = nr_copies;
 	sw->entry.copy_policy = copy_policy;
+	sw->entry.block_size_shift = block_size_shift;
 	sw->trunk_buf = trunk_buf;
 	sw->work.fn = do_save_object;
 	sw->work.done = save_object_done;
@@ -352,6 +354,7 @@ int farm_save_snapshot(const char *tag, bool multithread)
 		log_hdr.version = FARM_VERSION;
 		log_hdr.copy_number = cinfo.nr_copies;
 		log_hdr.copy_policy = cinfo.copy_policy;
+		log_hdr.block_size_shift = cinfo.block_size_shift;
 		snap_log_write_hdr(&log_hdr);
 	}
 
diff --git a/dog/farm/farm.h b/dog/farm/farm.h
index 0b86c0a..6b96c5e 100644
--- a/dog/farm/farm.h
+++ b/dog/farm/farm.h
@@ -22,7 +22,8 @@ struct trunk_entry {
 	uint64_t oid;
 	uint8_t nr_copies;
 	uint8_t copy_policy;
-	uint8_t reserved[2];
+	uint8_t block_size_shift;
+	uint8_t reserved;
 	unsigned char sha1[SHA1_DIGEST_SIZE];
 };
 
@@ -39,7 +40,8 @@ struct snap_log_hdr {
 	uint32_t version;
 	uint8_t copy_number;
 	uint8_t copy_policy;
-	uint8_t reserved[22];
+	uint8_t block_size_shift;
+	uint8_t reserved[21];
 };
 
 struct snap_log {
@@ -88,11 +90,13 @@ void *sha1_file_read(const unsigned char *sha1, size_t *size);
 
 /* object_tree.c */
 int object_tree_size(void);
-void object_tree_insert(uint64_t oid, uint32_t nr_copies, uint8_t);
+void object_tree_insert(uint64_t oid, uint32_t nr_copies,
+			uint8_t, uint8_t block_size_shift);
 void object_tree_free(void);
 void object_tree_print(void);
 int for_each_object_in_tree(int (*func)(uint64_t oid, uint32_t nr_copies,
-					uint8_t, void *data), void *data);
+					uint8_t, uint8_t block_size_shift,
+					void *data), void *data);
 /* slice.c */
 int slice_write(void *buf, size_t len, unsigned char *outsha1);
 void *slice_read(const unsigned char *sha1, size_t *outsize);
diff --git a/dog/farm/object_tree.c b/dog/farm/object_tree.c
index b90b58b..236b8df 100644
--- a/dog/farm/object_tree.c
+++ b/dog/farm/object_tree.c
@@ -18,6 +18,7 @@ struct object_tree_entry {
 	uint64_t oid;
 	uint8_t nr_copies;
 	uint8_t copy_policy;
+	uint8_t block_size_shift;
 	struct rb_node node;
 };
 
@@ -44,7 +45,8 @@ static struct object_tree_entry *do_insert(struct rb_root *root,
 	return rb_insert(root, new, node, object_tree_cmp);
 }
 
-void object_tree_insert(uint64_t oid, uint32_t nr_copies, uint8_t copy_policy)
+void object_tree_insert(uint64_t oid, uint32_t nr_copies,
+			uint8_t copy_policy, uint8_t block_size_shift)
 {
 	struct rb_root *root = &tree.root;
 	struct object_tree_entry *p = NULL;
@@ -54,6 +56,7 @@ void object_tree_insert(uint64_t oid, uint32_t nr_copies, uint8_t copy_policy)
 	cached_entry->oid = oid;
 	cached_entry->nr_copies = nr_copies;
 	cached_entry->copy_policy = copy_policy;
+	cached_entry->block_size_shift = block_size_shift;
 
 	rb_init_node(&cached_entry->node);
 	p = do_insert(root, cached_entry);
@@ -84,7 +87,8 @@ int object_tree_size(void)
 }
 
 int for_each_object_in_tree(int (*func)(uint64_t oid, uint32_t nr_copies,
-					uint8_t copy_policy, void *data),
+					uint8_t copy_policy,
+					uint8_t block_size_shift, void *data),
 			    void *data)
 {
 	struct object_tree_entry *entry;
@@ -92,7 +96,7 @@ int for_each_object_in_tree(int (*func)(uint64_t oid, uint32_t nr_copies,
 
 	rb_for_each_entry(entry, &tree.root, node) {
 		if (func(entry->oid, entry->nr_copies, entry->copy_policy,
-			 data) < 0)
+			 entry->block_size_shift, data) < 0)
 			goto out;
 	}
 	ret = 0;
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 82fa167..7d5c143 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -116,6 +116,7 @@
 #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
 #define SD_OLD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * OLD_MAX_DATA_OBJS)
 #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
+#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22
 
 #define SD_INODE_SIZE (sizeof(struct sd_inode))
 #define SD_INODE_INDEX_SIZE (sizeof(uint32_t) * MAX_DATA_OBJS)
diff --git a/sheep/ops.c b/sheep/ops.c
index 44b3ed0..c76fc4e 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -283,9 +283,12 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
 
 	sys->cinfo.nr_copies = req->cluster.copies;
 	sys->cinfo.copy_policy = req->cluster.copy_policy;
+	sys->cinfo.block_size_shift = req->cluster.block_size_shift;
 	sys->cinfo.flags = req->cluster.flags;
 	if (!sys->cinfo.nr_copies)
 		sys->cinfo.nr_copies = SD_DEFAULT_COPIES;
+	if (!sys->cinfo.block_size_shift)
+		sys->cinfo.block_size_shift = SD_DEFAULT_BLOCK_SIZE_SHIFT;
 	sys->cinfo.ctime = req->cluster.ctime;
 	set_cluster_config(&sys->cinfo);
 
-- 
1.7.1




More information about the sheepdog mailing list