[sheepdog] [PATCH 2/2] add sha1_from_buffer()

MORITA Kazutaka morita.kazutaka at gmail.com
Fri Jul 19 06:16:57 CEST 2013


From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

This adds a helper function to calculate a unique sha1 digest based on
the given buffer.

This also fixes a bug that we don't use an original buffer length to
calculate the hash value and the result is not unique.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 collie/farm/sha1_file.c |   22 ++--------------------
 include/sha1.h          |    1 +
 lib/sha1.c              |   28 ++++++++++++++++++++++++++--
 sheep/plain_store.c     |   10 +---------
 4 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/collie/farm/sha1_file.c b/collie/farm/sha1_file.c
index a2f3561..d82de58 100644
--- a/collie/farm/sha1_file.c
+++ b/collie/farm/sha1_file.c
@@ -28,24 +28,6 @@
 #include "farm.h"
 #include "util.h"
 
-static void get_sha1(unsigned char *buf, unsigned len, unsigned char *sha1)
-{
-	struct sha1_ctx c;
-	uint64_t offset = 0;
-	uint32_t length = len;
-	void *tmp = valloc(length);
-
-	memcpy(tmp, buf, len);
-	trim_zero_blocks(tmp, &offset, &length);
-
-	sha1_init(&c);
-	sha1_update(&c, (uint8_t *)&offset, sizeof(offset));
-	sha1_update(&c, (uint8_t *)&length, sizeof(length));
-	sha1_update(&c, tmp, length);
-	sha1_final(&c, sha1);
-	free(tmp);
-}
-
 static void fill_sha1_path(char *pathbuf, const unsigned char *sha1)
 {
 	int i;
@@ -159,7 +141,7 @@ int sha1_file_write(void *buf, size_t len, unsigned char *outsha1)
 {
 	unsigned char sha1[SHA1_DIGEST_SIZE];
 
-	get_sha1(buf, len, sha1);
+	sha1_from_buffer(buf, len, sha1);
 	if (sha1_buffer_write(sha1, buf, len) < 0)
 		return -1;
 	if (outsha1)
@@ -172,7 +154,7 @@ static int verify_sha1_file(const unsigned char *sha1,
 {
 	unsigned char tmp[SHA1_DIGEST_SIZE];
 
-	get_sha1(buf, len, tmp);
+	sha1_from_buffer(buf, len, tmp);
 	if (memcmp((char *)tmp, (char *)sha1, SHA1_DIGEST_SIZE) != 0) {
 		fprintf(stderr, "failed, %s != %s\n", sha1_to_hex(sha1),
 			sha1_to_hex(tmp));
diff --git a/include/sha1.h b/include/sha1.h
index dd1b4f4..a778aea 100644
--- a/include/sha1.h
+++ b/include/sha1.h
@@ -27,5 +27,6 @@ void sha1_init(void *ctx);
 void sha1_update(void *ctx, const uint8_t *data, unsigned int len);
 void sha1_final(void *ctx, uint8_t *out);
 const char *sha1_to_hex(const unsigned char *sha1);
+void sha1_from_buffer(const void *buf, size_t size, unsigned char *sha1);
 
 #endif
diff --git a/lib/sha1.c b/lib/sha1.c
index c1ada09..34d29b8 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -19,6 +19,7 @@
  */
 #include <arpa/inet.h>
 #include "sha1.h"
+#include "util.h"
 
 #define SHA1_DIGEST_SIZE	20
 #define SHA1_HMAC_BLOCK_SIZE	64
@@ -129,13 +130,13 @@ static void sha1_transform(uint32_t *state, const uint8_t *in)
 void sha1_init(void *ctx)
 {
 	struct sha1_ctx *sctx = ctx;
-	static const struct sha1_ctx initstate = {
+	static const struct sha1_ctx init_state = {
 	  0,
 	  { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 },
 	  { 0, }
 	};
 
-	*sctx = initstate;
+	*sctx = init_state;
 }
 
 void sha1_update(void *ctx, const uint8_t *data, unsigned int len)
@@ -212,3 +213,26 @@ const char *sha1_to_hex(const unsigned char *sha1)
 	}
 	return buffer;
 }
+
+/*
+ * Calculate a sha1 message digest based on the content of 'buf'
+ *
+ * This calculates a unique sha1 digest faster than the naive calculation when
+ * the content of 'buf' is sparse.  The result will be set in 'sha1'.
+ */
+void sha1_from_buffer(const void *buf, size_t size, unsigned char *sha1)
+{
+	struct sha1_ctx c;
+	uint64_t offset = 0;
+	uint32_t length = size;
+
+	sha1_init(&c);
+	sha1_update(&c, (uint8_t *)&length, sizeof(length));
+
+	find_zero_blocks(buf, &offset, &length);
+
+	sha1_update(&c, (uint8_t *)&length, sizeof(length));
+	sha1_update(&c, (uint8_t *)&offset, sizeof(offset));
+	sha1_update(&c, buf, length);
+	sha1_final(&c, sha1);
+}
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 4854be8..bebe2f3 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -527,8 +527,6 @@ int default_get_hash(uint64_t oid, uint32_t epoch, uint8_t *sha1)
 	int ret;
 	void *buf;
 	struct siocb iocb = {};
-	struct sha1_ctx c;
-	uint64_t offset = 0;
 	uint32_t length;
 	bool is_readonly_obj = oid_is_readonly(oid);
 	char path[PATH_MAX];
@@ -560,13 +558,7 @@ int default_get_hash(uint64_t oid, uint32_t epoch, uint8_t *sha1)
 		return ret;
 	}
 
-	trim_zero_blocks(buf, &offset, &length);
-
-	sha1_init(&c);
-	sha1_update(&c, (uint8_t *)&offset, sizeof(offset));
-	sha1_update(&c, (uint8_t *)&length, sizeof(length));
-	sha1_update(&c, buf, length);
-	sha1_final(&c, sha1);
+	sha1_from_buffer(buf, length, sha1);
 	free(buf);
 
 	sd_dprintf("the message digest of %"PRIx64" at epoch %d is %s", oid,
-- 
1.7.9.5




More information about the sheepdog mailing list