[sheepdog] [PATCH RESEND 2/6] farm: remove sha1_hdr

Kai Zhang kyle at zelin.io
Wed Jun 5 12:35:10 CEST 2013


This is inspired by speed up saving snapshot.

We can speed up saving snapshot by reading sha1 value firstly.
Because 'farm' saves data with file name of sha1 value, so it is easy to check wether an object with the sha1 value already exists in local path.

However, the currently algorithm of computing sha1 is different between 'sheep' and 'farm' in two ways:
1. farm save data with a sha1_hdr, but sheep don't
2. sheep trims data before calculate sha1

This patch is to remove sha1_hdr from sha1 file.

There are 2 other reasons that support this idea:
1. Currently, sha1_hdr contains a tag which is to distinguish file type of data, trunk and snap.
   However, this is not neccessary. We can find snap by snap_log, and trunk by snap.
   We don't need to find a type of file by reading header.
2. Different type of data should not share the same hdr which has different meaning in different places.

Signed-off-by: Kai Zhang <kyle at zelin.io>
---
 collie/farm/farm.c      |   47 +++++++++++-------------
 collie/farm/farm.h      |   31 ++++++++--------
 collie/farm/sha1_file.c |   90 ++++++++++++++++++-----------------------------
 collie/farm/snap.c      |   39 +++++---------------
 collie/farm/trunk.c     |   67 +++++++++++-----------------------
 5 files changed, 102 insertions(+), 172 deletions(-)

diff --git a/collie/farm/farm.c b/collie/farm/farm.c
index 072ee16..cb14c8b 100644
--- a/collie/farm/farm.c
+++ b/collie/farm/farm.c
@@ -153,8 +153,7 @@ static int get_trunk_sha1(uint32_t idx, const char *tag, unsigned char *outsha1)
 {
 	int nr_logs = -1, ret = -1;
 	struct snap_log *log_buf, *log_free = NULL;
-	void *snap_buf = NULL;
-	struct sha1_file_hdr hdr;
+	struct snap_file *snap_buf = NULL;
 
 	log_free = log_buf = snap_log_read(&nr_logs);
 	if (nr_logs < 0)
@@ -163,10 +162,10 @@ static int get_trunk_sha1(uint32_t idx, const char *tag, unsigned char *outsha1)
 	for (int i = 0; i < nr_logs; i++, log_buf++) {
 		if (log_buf->idx != idx && strcmp(log_buf->tag, tag))
 			continue;
-		snap_buf = snap_file_read(log_buf->sha1, &hdr);
+		snap_buf = snap_file_read(log_buf->sha1);
 		if (!snap_buf)
 			goto out;
-		memcpy(outsha1, snap_buf, SHA1_LEN);
+		memcpy(outsha1, snap_buf->trunk_sha1, SHA1_LEN);
 		ret = 0;
 		goto out;
 	}
@@ -220,33 +219,27 @@ bool farm_contain_snapshot(uint32_t idx, const char *tag)
 
 static void do_save_object(struct work *work)
 {
-	void *sha1_buf, *data_buf;
-	size_t sha1_size, data_size;
+	void *buf;
+	size_t size;
 	struct snapshot_work *sw;
-	struct sha1_file_hdr *hdr;
 
 	if (uatomic_is_true(&work_error))
 		return;
 
 	sw = container_of(work, struct snapshot_work, work);
-	data_size = get_objsize(sw->entry.oid);
-	sha1_size = data_size + sizeof(struct sha1_file_hdr);
-	hdr = sha1_buf = xmalloc(sha1_size);
-	data_buf = (char *)sha1_buf + sizeof(struct sha1_file_hdr);
+	size = get_objsize(sw->entry.oid);
+	buf = xmalloc(size);
 
-	if (sd_read_object(sw->entry.oid, data_buf, data_size, 0, true) < 0)
+	if (sd_read_object(sw->entry.oid, buf, size, 0, true) < 0)
 		goto error;
 
-	memcpy(hdr->tag, TAG_DATA, TAG_LEN);
-	hdr->size = data_size;
-
-	if (sha1_file_write(sha1_buf, sha1_size, sw->entry.sha1) < 0)
+	if (sha1_file_write(buf, size, sw->entry.sha1) < 0)
 		goto error;
 
-	free(sha1_buf);
+	free(buf);
 	return;
 error:
-	free(sha1_buf);
+	free(buf);
 	fprintf(stderr, "Fail to save object, oid %"PRIu64"\n",
 		sw->entry.oid);
 	uatomic_set_true(&work_error);
@@ -284,9 +277,10 @@ int farm_save_snapshot(const char *tag)
 {
 	unsigned char snap_sha1[SHA1_LEN];
 	unsigned char trunk_sha1[SHA1_LEN];
-	struct strbuf trunk_entries = STRBUF_INIT;
+	struct strbuf trunk_buf;
 	void *snap_log = NULL;
 	int log_nr, idx, ret = -1;
+	uint64_t nr_objects = object_tree_size();
 
 	snap_log = snap_log_read(&log_nr);
 	if (!snap_log)
@@ -294,16 +288,19 @@ int farm_save_snapshot(const char *tag)
 
 	idx = log_nr + 1;
 
+	strbuf_init(&trunk_buf, sizeof(struct trunk_entry) * nr_objects);
+
 	wq = create_work_queue("save snapshot", WQ_ORDERED);
 	if (for_each_object_in_tree(queue_save_snapshot_work,
-				    (void *)&trunk_entries) < 0)
+				    (void *)&trunk_buf) < 0)
 		goto out;
 
 	work_queue_wait(wq);
 	if (uatomic_is_true(&work_error))
 		goto out;
 
-	if (trunk_file_write(trunk_sha1, &trunk_entries) < 0)
+	if (trunk_file_write(nr_objects, (struct trunk_entry *)trunk_buf.buf,
+			     trunk_sha1) < 0)
 		goto out;
 
 	if (snap_file_write(idx, trunk_sha1, snap_sha1) < 0)
@@ -314,15 +311,15 @@ int farm_save_snapshot(const char *tag)
 
 	ret = 0;
 out:
+	strbuf_release(&trunk_buf);
 	free(snap_log);
-	strbuf_release(&trunk_entries);
 	return ret;
 }
 
 static void do_load_object(struct work *work)
 {
 	void *buffer = NULL;
-	struct sha1_file_hdr hdr;
+	size_t size;
 	struct snapshot_work *sw;
 
 	if (uatomic_is_true(&work_error))
@@ -330,12 +327,12 @@ static void do_load_object(struct work *work)
 
 	sw = container_of(work, struct snapshot_work, work);
 
-	buffer = sha1_file_read(sw->entry.sha1, &hdr);
+	buffer = sha1_file_read(sw->entry.sha1, &size);
 
 	if (!buffer)
 		goto error;
 
-	if (sd_write_object(sw->entry.oid, 0, buffer, hdr.size, 0, 0,
+	if (sd_write_object(sw->entry.oid, 0, buffer, size, 0, 0,
 			    sw->entry.nr_copies, true, true) != 0)
 		goto error;
 
diff --git a/collie/farm/farm.h b/collie/farm/farm.h
index 10ba597..5a81987 100644
--- a/collie/farm/farm.h
+++ b/collie/farm/farm.h
@@ -20,22 +20,20 @@
 #include "strbuf.h"
 #include "sha1.h"
 
-#define TAG_LEN         6
-#define TAG_DATA        "data\0\0"
-#define TAG_TRUNK       "trunk\0"
-#define TAG_SNAP        "snap\0\0"
-
 struct trunk_entry {
 	uint64_t oid;
 	int nr_copies;
 	unsigned char sha1[SHA1_LEN];
 };
 
-struct sha1_file_hdr {
-	char tag[TAG_LEN];
-	uint64_t size;
-	uint64_t priv;
-	uint64_t reserved;
+struct trunk_file {
+	uint64_t nr_entries;
+	struct trunk_entry *entries;
+};
+
+struct snap_file {
+	int idx;
+	unsigned char trunk_sha1[SHA1_LEN];
 };
 
 /* farm.c */
@@ -47,23 +45,24 @@ char *get_object_directory(void);
 
 /* trunk.c */
 int trunk_init(void);
-int trunk_file_write(unsigned char *trunk_sha1, struct strbuf *trunk_entries);
-void *trunk_file_read(unsigned char *sha1, struct sha1_file_hdr *);
+int trunk_file_write(uint64_t nr_entries, struct trunk_entry *entries,
+		     unsigned char *trunk_sha1);
+struct trunk_file *trunk_file_read(unsigned char *sha1);
 int for_each_entry_in_trunk(unsigned char *trunk_sha1,
 			    int (*func)(struct trunk_entry *entry, void *data),
 			    void *data);
 
 /* snap.c */
 int snap_init(const char *path);
-void *snap_file_read(unsigned char *sha1, struct sha1_file_hdr *outhdr);
-int snap_file_write(uint32_t idx, unsigned char *trunksha1,
+struct snap_file *snap_file_read(unsigned char *sha1);
+int snap_file_write(uint32_t idx, unsigned char *trunk_sha1,
 		    unsigned char *outsha1);
 void *snap_log_read(int *out_nr);
 int snap_log_write(uint32_t idx, const char *tag, unsigned char *sha1);
 
 /* sha1_file.c */
-int sha1_file_write(unsigned char *buf, unsigned len, unsigned char *);
-void *sha1_file_read(const unsigned char *sha1, struct sha1_file_hdr *);
+int sha1_file_write(unsigned char *buf, size_t len, unsigned char *sha1);
+void *sha1_file_read(const unsigned char *sha1, size_t *size);
 int get_sha1_hex(const char *hex, unsigned char *sha1);
 int sha1_file_try_delete(const unsigned char *sha1);
 
diff --git a/collie/farm/sha1_file.c b/collie/farm/sha1_file.c
index 2d94673..2cd7416 100644
--- a/collie/farm/sha1_file.c
+++ b/collie/farm/sha1_file.c
@@ -132,7 +132,7 @@ err_open:
 	return ret;
 }
 
-int sha1_file_write(unsigned char *buf, unsigned len, unsigned char *outsha1)
+int sha1_file_write(unsigned char *buf, size_t len, unsigned char *outsha1)
 {
 	unsigned char sha1[SHA1_LEN];
 	struct sha1_ctx c;
@@ -148,50 +148,6 @@ int sha1_file_write(unsigned char *buf, unsigned len, unsigned char *outsha1)
 	return 0;
 }
 
-static void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
-{
-	char *filename = sha1_to_path(sha1);
-	int fd = open(filename, O_RDONLY);
-	struct stat st;
-	void *map;
-
-	if (fd < 0) {
-		perror(filename);
-		return NULL;
-	}
-	if (fstat(fd, &st) < 0) {
-		fprintf(stderr, "%m\n");
-		close(fd);
-		return NULL;
-	}
-	map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
-	close(fd);
-	if (map == MAP_FAILED) {
-		fprintf(stderr, "%m\n");
-		return NULL;
-	}
-	*size = st.st_size;
-	return map;
-}
-
-static void *unpack_sha1_file(void *map, unsigned long mapsize,
-			      struct sha1_file_hdr *hdr)
-{
-	int hdr_len;
-	char *buf;
-
-	memcpy(hdr, map, sizeof(*hdr));
-	hdr_len = sizeof(*hdr);
-	buf = valloc(hdr->size);
-	if (!buf) {
-		fprintf(stderr, "%m\n");
-		return NULL;
-	}
-
-	memcpy(buf, (char *)map + hdr_len, mapsize - hdr_len);
-	return buf;
-}
-
 static int verify_sha1_file(const unsigned char *sha1,
 			    void *buf, unsigned long len)
 {
@@ -210,20 +166,42 @@ static int verify_sha1_file(const unsigned char *sha1,
 	return 0;
 }
 
-void *sha1_file_read(const unsigned char *sha1, struct sha1_file_hdr *hdr)
+void *sha1_file_read(const unsigned char *sha1, size_t *size)
 {
-	unsigned long mapsize;
-	void *map, *buf;
+	char *filename = sha1_to_path(sha1);
+	int fd = open(filename, O_RDONLY);
+	struct stat st;
+	void *buf = NULL;
+
+	if (fd < 0) {
+		perror(filename);
+		return NULL;
+	}
+	if (fstat(fd, &st) < 0) {
+		fprintf(stderr, "%m\n");
+		goto out;
+	}
+
+	buf = xmalloc(st.st_size);
+	if (!buf)
+		goto out;
 
-	map = map_sha1_file(sha1, &mapsize);
-	if (map) {
-		if (verify_sha1_file(sha1, map, mapsize) < 0)
-			return NULL;
-		buf = unpack_sha1_file(map, mapsize, hdr);
-		munmap(map, mapsize);
-		return buf;
+	if (xread(fd, buf, st.st_size) != st.st_size) {
+		free(buf);
+		buf = NULL;
+		goto out;
 	}
-	return NULL;
+
+	if (verify_sha1_file(sha1, buf, st.st_size) < 0) {
+		free(buf);
+		buf = NULL;
+		goto out;
+	}
+
+	*size = st.st_size;
+out:
+	close(fd);
+	return buf;
 }
 
 int sha1_file_try_delete(const unsigned char *sha1)
diff --git a/collie/farm/snap.c b/collie/farm/snap.c
index f28601a..a341cd1 100644
--- a/collie/farm/snap.c
+++ b/collie/farm/snap.c
@@ -109,40 +109,19 @@ out:
 	return buffer;
 }
 
-void *snap_file_read(unsigned char *sha1, struct sha1_file_hdr *outhdr)
+struct snap_file *snap_file_read(unsigned char *sha1)
 {
-	void *buffer = NULL;
-
-	buffer = sha1_file_read(sha1, outhdr);
-	if (!buffer)
-		return NULL;
-	if (strcmp(outhdr->tag, TAG_SNAP) != 0) {
-		free(buffer);
-		return NULL;
-	}
-
-	return buffer;
+	size_t size;
+	return (struct snap_file *)sha1_file_read(sha1, &size);
 }
 
-int snap_file_write(uint32_t idx, unsigned char *trunksha1,
+int snap_file_write(uint32_t idx, unsigned char *trunk_sha1,
 		    unsigned char *outsha1)
 {
-	int ret = -1;
-	struct sha1_file_hdr hdr = {};
-	struct strbuf buf = STRBUF_INIT;
+	struct snap_file snap;
+	snap.idx = idx;
+	memcpy(snap.trunk_sha1, trunk_sha1, SHA1_LEN);
 
-	memcpy(hdr.tag, TAG_SNAP, TAG_LEN);
-	hdr.size = SHA1_LEN;
-	hdr.priv = idx;
-	hdr.reserved = 0;
-
-	strbuf_add(&buf, &hdr, sizeof(hdr));
-	strbuf_add(&buf, trunksha1, SHA1_LEN);
-	if (sha1_file_write((void *)buf.buf, buf.len, outsha1) < 0)
-		goto out;
-
-	ret = 0;
-out:
-	strbuf_release(&buf);
-	return ret;
+	return sha1_file_write((void *)&snap, sizeof(struct snap_file),
+			       outsha1);
 }
diff --git a/collie/farm/trunk.c b/collie/farm/trunk.c
index c2f5bbf..7d30f92 100644
--- a/collie/farm/trunk.c
+++ b/collie/farm/trunk.c
@@ -28,72 +28,49 @@
 #include "util.h"
 #include "sheepdog_proto.h"
 
-int trunk_file_write(unsigned char *trunk_sha1, struct strbuf *trunk_entries)
+int trunk_file_write(uint64_t nr_entries, struct trunk_entry *entries,
+		     unsigned char *trunk_sha1)
 {
-	struct strbuf buf;
-	struct sha1_file_hdr hdr = {};
-	uint64_t data_size, object_nr = 0;
-	int ret = -1;
-
-	/* Init trunk hdr */
-	object_nr = object_tree_size();
-	data_size = sizeof(struct trunk_entry) * object_nr;
-	hdr.size = data_size;
-	hdr.priv = object_nr;
-	memcpy(hdr.tag, TAG_TRUNK, TAG_LEN);
-	strbuf_init(&buf, sizeof(hdr) + data_size);
-	strbuf_add(&buf, &hdr, sizeof(hdr));
-
-	/* trunk entries */
-	strbuf_addbuf(&buf, trunk_entries);
-
-	/* write to sha1 file */
-	if (sha1_file_write((void *)buf.buf, buf.len, trunk_sha1) < 0)
-		goto out;
-
-	ret = 0;
-out:
-	strbuf_release(&buf);
-	return ret;
+	size_t size = sizeof(struct trunk_entry) * nr_entries;
+	return sha1_file_write((void *)entries, size, trunk_sha1);
 }
 
-void *trunk_file_read(unsigned char *sha1, struct sha1_file_hdr *outhdr)
+struct trunk_file *trunk_file_read(unsigned char *sha1)
 {
-	void *buffer;
+	size_t size;
+	struct trunk_file *trunk = NULL;
+	void *buf = sha1_file_read(sha1, &size);
 
-	buffer = sha1_file_read(sha1, outhdr);
-	if (!buffer)
+	if (!buf)
 		return NULL;
-	if (strcmp(outhdr->tag, TAG_TRUNK) != 0) {
-		free(buffer);
-		return NULL;
-	}
+	trunk = xmalloc(sizeof(struct trunk_file));
+	trunk->nr_entries = size / sizeof(struct trunk_entry);
+	trunk->entries = (struct trunk_entry *)buf;
 
-	return buffer;
+	return trunk;
 }
 
 int for_each_entry_in_trunk(unsigned char *trunk_sha1,
 			    int (*func)(struct trunk_entry *entry, void *data),
 			    void *data)
 {
-	struct trunk_entry *trunk_entry, *trunk_free = NULL;
-	struct sha1_file_hdr trunk_hdr;
-	uint64_t nr_trunks;
+	struct trunk_file *trunk;
+	struct trunk_entry *entry;
 	int ret = -1;
 
-	trunk_free = trunk_entry = trunk_file_read(trunk_sha1, &trunk_hdr);
-
-	if (!trunk_entry)
+	trunk = trunk_file_read(trunk_sha1);
+	if (!trunk)
 		goto out;
 
-	nr_trunks = trunk_hdr.priv;
-	for (uint64_t i = 0; i < nr_trunks; i++, trunk_entry++) {
-		if (func(trunk_entry, data) < 0)
+	entry = trunk->entries;
+	for (uint64_t i = 0; i < trunk->nr_entries; i++, entry++) {
+		if (func(entry, data) < 0)
 			goto out;
 	}
 
 	ret = 0;
 out:
-	free(trunk_free);
+	free(trunk->entries);
+	free(trunk);
 	return ret;
 }
-- 
1.7.1




More information about the sheepdog mailing list