[sheepdog] [PATCH 2/2] add sha1_from_buffer()
Liu Yuan
namei.unix at gmail.com
Fri Jul 19 07:07:08 CEST 2013
On Fri, Jul 19, 2013 at 01:16:57PM +0900, MORITA Kazutaka wrote:
> From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
>
> This adds a helper function to calculate a unique sha1 digest based on
> the given buffer.
>
> This also fixes a bug that we don't use an original buffer length to
> calculate the hash value and the result is not unique.
>
> Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
> ---
> collie/farm/sha1_file.c | 22 ++--------------------
> include/sha1.h | 1 +
> lib/sha1.c | 28 ++++++++++++++++++++++++++--
> sheep/plain_store.c | 10 +---------
> 4 files changed, 30 insertions(+), 31 deletions(-)
>
> diff --git a/collie/farm/sha1_file.c b/collie/farm/sha1_file.c
> index a2f3561..d82de58 100644
> --- a/collie/farm/sha1_file.c
> +++ b/collie/farm/sha1_file.c
> @@ -28,24 +28,6 @@
> #include "farm.h"
> #include "util.h"
>
> -static void get_sha1(unsigned char *buf, unsigned len, unsigned char *sha1)
> -{
> - struct sha1_ctx c;
> - uint64_t offset = 0;
> - uint32_t length = len;
> - void *tmp = valloc(length);
> -
> - memcpy(tmp, buf, len);
> - trim_zero_blocks(tmp, &offset, &length);
> -
> - sha1_init(&c);
> - sha1_update(&c, (uint8_t *)&offset, sizeof(offset));
> - sha1_update(&c, (uint8_t *)&length, sizeof(length));
> - sha1_update(&c, tmp, length);
> - sha1_final(&c, sha1);
> - free(tmp);
> -}
> -
> static void fill_sha1_path(char *pathbuf, const unsigned char *sha1)
> {
> int i;
> @@ -159,7 +141,7 @@ int sha1_file_write(void *buf, size_t len, unsigned char *outsha1)
> {
> unsigned char sha1[SHA1_DIGEST_SIZE];
>
> - get_sha1(buf, len, sha1);
> + sha1_from_buffer(buf, len, sha1);
> if (sha1_buffer_write(sha1, buf, len) < 0)
> return -1;
> if (outsha1)
> @@ -172,7 +154,7 @@ static int verify_sha1_file(const unsigned char *sha1,
> {
> unsigned char tmp[SHA1_DIGEST_SIZE];
>
> - get_sha1(buf, len, tmp);
> + sha1_from_buffer(buf, len, tmp);
> if (memcmp((char *)tmp, (char *)sha1, SHA1_DIGEST_SIZE) != 0) {
> fprintf(stderr, "failed, %s != %s\n", sha1_to_hex(sha1),
> sha1_to_hex(tmp));
> diff --git a/include/sha1.h b/include/sha1.h
> index dd1b4f4..a778aea 100644
> --- a/include/sha1.h
> +++ b/include/sha1.h
> @@ -27,5 +27,6 @@ void sha1_init(void *ctx);
> void sha1_update(void *ctx, const uint8_t *data, unsigned int len);
> void sha1_final(void *ctx, uint8_t *out);
> const char *sha1_to_hex(const unsigned char *sha1);
> +void sha1_from_buffer(const void *buf, size_t size, unsigned char *sha1);
>
> #endif
> diff --git a/lib/sha1.c b/lib/sha1.c
> index c1ada09..34d29b8 100644
> --- a/lib/sha1.c
> +++ b/lib/sha1.c
> @@ -19,6 +19,7 @@
> */
> #include <arpa/inet.h>
> #include "sha1.h"
> +#include "util.h"
>
> #define SHA1_DIGEST_SIZE 20
> #define SHA1_HMAC_BLOCK_SIZE 64
> @@ -129,13 +130,13 @@ static void sha1_transform(uint32_t *state, const uint8_t *in)
> void sha1_init(void *ctx)
> {
> struct sha1_ctx *sctx = ctx;
> - static const struct sha1_ctx initstate = {
> + static const struct sha1_ctx init_state = {
> 0,
> { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 },
> { 0, }
> };
>
> - *sctx = initstate;
> + *sctx = init_state;
> }
>
> void sha1_update(void *ctx, const uint8_t *data, unsigned int len)
> @@ -212,3 +213,26 @@ const char *sha1_to_hex(const unsigned char *sha1)
> }
> return buffer;
> }
> +
> +/*
> + * Calculate a sha1 message digest based on the content of 'buf'
> + *
> + * This calculates a unique sha1 digest faster than the naive calculation when
> + * the content of 'buf' is sparse. The result will be set in 'sha1'.
> + */
> +void sha1_from_buffer(const void *buf, size_t size, unsigned char *sha1)
> +{
> + struct sha1_ctx c;
> + uint64_t offset = 0;
> + uint32_t length = size;
> +
> + sha1_init(&c);
> + sha1_update(&c, (uint8_t *)&length, sizeof(length));
> +
> + find_zero_blocks(buf, &offset, &length);
> +
> + sha1_update(&c, (uint8_t *)&length, sizeof(length));
> + sha1_update(&c, (uint8_t *)&offset, sizeof(offset));
> + sha1_update(&c, buf, length);
> + sha1_final(&c, sha1);
> +}
Why this is faster? sha1_update(&c, buf, length) will try to hash the original
buf besides double hashing of length & offset, no?
Thanks
Yuan
More information about the sheepdog
mailing list