[sheepdog] [PATCH 1/9] add forward error correction for erasure code

Fri Sep 20 10:40:36 CEST 2013

At Thu, 19 Sep 2013 18:42:45 +0800,
Liu Yuan wrote:
> 
> This is imported and based on zfec
> 
> Signed-off-by: Liu Yuan <namei.unix at gmail.com>
> ---
>  include/Makefile.am |    2 +-
>  include/fec.h       |  170 +++++++++++++++
>  lib/Makefile.am     |    2 +-
>  lib/fec.c           |  578 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 750 insertions(+), 2 deletions(-)
>  create mode 100644 include/fec.h
>  create mode 100644 lib/fec.c
> 
> diff --git a/include/Makefile.am b/include/Makefile.am
> index 06e97a6..2c86984 100644
> --- a/include/Makefile.am
> +++ b/include/Makefile.am
> @@ -3,4 +3,4 @@ MAINTAINERCLEANFILES    = Makefile.in config.h.in
>  noinst_HEADERS          = bitops.h event.h logger.h sheepdog_proto.h util.h \
>  			  list.h net.h sheep.h exits.h strbuf.h rbtree.h \
>  			  sha1.h option.h internal_proto.h shepherd.h work.h \
> -			  sockfd_cache.h compiler.h
> +			  sockfd_cache.h compiler.h fec.h
> diff --git a/include/fec.h b/include/fec.h
> new file mode 100644
> index 0000000..e10802d
> --- /dev/null
> +++ b/include/fec.h
> @@ -0,0 +1,170 @@
> +/*
> + * zfec -- fast forward error correction library
> + *
> + * Copyright (C) 2007-2008 Allmyds, Inc.
> + * Author: Zooko Wilcox-O'Hearn
> + *
> + * This file is part of zfec.
> + *
> + * See README.rst for licensing information.
> + */
> +
> +/*
> + * Much of this work is derived from the "fec" software by Luigi Rizzo, et
> + * al., the copyright notice and licence terms of which are included below
> + * for reference.
> + *
> + * fec.h -- forward error correction based on Vandermonde matrices
> + * 980614
> + * (C) 1997-98 Luigi Rizzo (luigi at iet.unipi.it)
> + *
> + * Portions derived from code by Phil Karn (karn at ka9q.ampr.org),
> + * Robert Morelos-Zaragoza (robert at spectra.eng.hawaii.edu) and Hari
> + * Thirumoorthy (harit at spectra.eng.hawaii.edu), Aug 1995
> + *
> + * Modifications by Dan Rubenstein (see Modifications.txt for
> + * their description.
> + * Modifications (C) 1998 Dan Rubenstein (drubenst at cs.umass.edu)
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> +
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above
> + *    copyright notice, this list of conditions and the following
> + *    disclaimer in the documentation and/or other materials
> + *    provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
> + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
> + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
> + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
> + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
> + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
> + * OF SUCH DAMAGE.
> + */
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +
> +#ifdef __GNUC__
> +#ifndef alloca
> +#define alloca(x) __builtin_alloca(x)
> +#endif
> +#else
> +#include <alloca.h>
> +#endif
> +
> +struct fec {
> +	unsigned long magic;
> +	unsigned short k, n;                     /* parameters of the code */
> +	uint8_t *enc_matrix;
> +};
> +
> +/*
> + * param k the number of blocks required to reconstruct
> + * param m the total number of blocks created
> + */
> +struct fec *fec_new(unsigned short k, unsigned short m);
> +void fec_free(struct fec *p);
> +
> +/*
> + * @param inpkts the "primary blocks" i.e. the chunks of the input data
> + * @param fecs buffers into which the secondary blocks will be written
> + * @param block_nums the numbers of the desired check blocks (the id >= k) which
> + * fec_encode() will produce and store into the buffers of the fecs parameter
> + * @param num_block_nums the length of the block_nums array
> + * @param sz size of a packet in bytes
> + */
> +void fec_encode(const struct fec *code,
> +		const uint8_t *const *const src,
> +		uint8_t *const *const fecs,
> +		const int *const block_nums,
> +		size_t num_block_nums, size_t sz);
> +
> +/*
> + * @param inpkts an array of packets (size k); If a primary block, i, is present
> + * then it must be at index i. Secondary blocks can appear anywhere.
> + * @param outpkts an array of buffers into which the reconstructed output
> + * packets will be written (only packets which are not present in the inpkts
> + * input will be reconstructed and written to outpkts)
> + * @param index an array of the blocknums of the packets in inpkts
> + * @param sz size of a packet in bytes
> + */
> +void fec_decode(const struct fec *code,
> +		const uint8_t *const *const inpkts,
> +		uint8_t *const *const outpkts,
> +		const int *const index, size_t sz);
> +
> +#define SD_EC_D	4 /* No. of data strips */
> +#define SD_EC_P 2 /* No. of parity strips */
> +#define SD_EC_DP (SD_EC_D + SD_EC_P)
> +#define SD_EC_STRIP_SIZE (1*1024) /* 1k best empirical value */
> +#define SD_EC_D_SIZE (SD_EC_STRIP_SIZE * SD_EC_D)
> +#define SD_EC_OBJECT_SIZE (SD_DATA_OBJ_SIZE / SD_EC_D)
> +#define SD_EC_STRIPE (SD_EC_STRIP_SIZE * SD_EC_DP)
> +#define STRIP_PER_OBJECT (SD_DATA_OBJ_SIZE / SD_EC_STRIP_SIZE)
> +
> +/*
> + * Stripe: data strips + parity strips, spread on all replica
> + * DS: data strip
> + * PS: parity strip
> + * R: Replica
> + *
> + *  +--------------------stripe ----------------------+
> + *  v                                                 v
> + * +----+----------------------------------------------+
> + * | ds | ds | ds | ds | ds | ... | ps | ps | ... | ps |
> + * +----+----------------------------------------------+
> + * | .. | .. | .. | .. | .. | ... | .. | .. | ... | .. |
> + * +----+----+----+----+----+ ... +----+----+-----+----+
> + *  R1    R2   R3   R4   R5   ...   Rn  Rn+1  Rn+2  Rn+3
> + */
> +
> +/* Return the erasure code context to encode|decode */
> +static inline void *ec_init(void)
> +{
> +	return fec_new(SD_EC_D, SD_EC_DP);
> +}
> +
> +/*
> + * This function decodes the data strips and return the parity strips
> + *
> + * @ds: data strips to generate parity strips
> + * @ps: parity strips to return
> + */
> +static inline void ec_encode(void *ctx, const uint8_t *ds[SD_EC_D],
> +			     uint8_t *ps[SD_EC_P])
> +{
> +	int total = SD_EC_D + SD_EC_P;
> +	const int pidx[SD_EC_P] = { total - 2, total - 1 };
> +
> +	fec_encode(ctx, ds, ps, pidx, SD_EC_P, SD_EC_STRIP_SIZE);
> +}
> +
> +/*
> + * This function takes input strips and return the lost strips
> + *
> + * @input: strips (either ds or ps) that are used to generate lost strips
> + * @output: the lost ds or ps to return
> + * @idx: indexes of each input strip in the whole stripe
> + */
> +static inline void ec_decode(void *ctx, const uint8_t *input[SD_EC_D],
> +			     uint8_t *output[],
> +			     const int idx[])
> +{
> +	fec_decode(ctx, input, output, idx, SD_EC_STRIP_SIZE);
> +}
> +
> +/* Destroy the erasure code context */
> +static inline void ec_destroy(void *ctx)
> +{
> +	fec_free(ctx);
> +}
> diff --git a/lib/Makefile.am b/lib/Makefile.am
> index 496c588..4cc1a17 100644
> --- a/lib/Makefile.am
> +++ b/lib/Makefile.am
> @@ -5,7 +5,7 @@ INCLUDES                = -I$(top_builddir)/include -I$(top_srcdir)/include
>  noinst_LIBRARIES	= libsheepdog.a
>  
>  libsheepdog_a_SOURCES	= event.c logger.c net.c util.c rbtree.c strbuf.c \
> -			  sha1.c option.c work.c sockfd_cache.c
> +			  sha1.c option.c work.c sockfd_cache.c fec.c
>  
>  if BUILD_SHA1_HW
>  libsheepdog_a_SOURCES	+= sha1_ssse3.S
> diff --git a/lib/fec.c b/lib/fec.c
> new file mode 100644
> index 0000000..331c7b7
> --- /dev/null
> +++ b/lib/fec.c
> @@ -0,0 +1,578 @@
> +/*
> + * zfec -- fast forward error correction
> + *
> + * Copyright (C) 2007-2010 Zooko Wilcox-O'Hearn
> + * Author: Zooko Wilcox-O'Hearn
> + *
> + * This file is part of zfec.
> + *
> + * Imported by Liu Yuan <namei.unix at gmail.com>
> + *
> + */
> +
> +/*
> + * This work is derived from the "fec" software by Luigi Rizzo, et al., the
> + * copyright notice and licence terms of which are included below for reference.
> + * fec.c -- forward error correction based on Vandermonde matrices 980624 (C)
> + * 1997-98 Luigi Rizzo (luigi at iet.unipi.it)
> + *
> + * Portions derived from code by Phil Karn (karn at ka9q.ampr.org),
> + * Robert Morelos-Zaragoza (robert at spectra.eng.hawaii.edu) and Hari
> + * Thirumoorthy (harit at spectra.eng.hawaii.edu), Aug 1995
> + *
> + * Modifications by Dan Rubenstein (see Modifications.txt for
> + * their description.
> + * Modifications (C) 1998 Dan Rubenstein (drubenst at cs.umass.edu)
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above
> + *    copyright notice, this list of conditions and the following
> + *    disclaimer in the documentation and/or other materials
> + *    provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
> + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
> + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
> + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
> + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
> + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
> + * OF SUCH DAMAGE.
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +
> +#include "fec.h"
> +#include "util.h"
> +
> +/*
> + * Primitive polynomials - see Lin & Costello, Appendix A,
> + * and  Lee & Messerschmitt, p. 453.
> + */
> +static const char *const Pp = "101110001";
> +
> +/*
> + * To speed up computations, we have tables for logarithm, exponent and
> + * inverse of a number.  We use a table for multiplication as well (it takes
> + * 64K, no big deal even on a PDA, especially because it can be
> + * pre-initialized an put into a ROM!), otherwhise we use a table of
> + * logarithms. In any case the macro gf_mul(x,y) takes care of
> + * multiplications.
> + */
> +
> +static uint8_t gf_exp[510];  /* idx->poly form conversion table    */
> +static int gf_log[256]; /* Poly->idx form conversion table    */
> +static uint8_t inverse[256]; /* inverse of field elem.               */
> +/* inv[\alpha**i]=\alpha**(GF_SIZE-i-1) */
> +
> +/*
> + * modnn(x) computes x % GF_SIZE, where GF_SIZE is 2**GF_BITS - 1,
> + * without a slow divide.
> + */
> +static uint8_t modnn(int x)
> +{
> +	while (x >= 255) {
> +		x -= 255;
> +		x = (x >> 8) + (x & 255);
> +	}
> +	return x;
> +}
> +
> +#define SWAP(a, b, t) { t tmp; tmp = a; a = b; b = tmp; }
> +
> +/*
> + * gf_mul(x,y) multiplies two numbers.  It is much faster to use a
> + * multiplication table.
> + *
> + * USE_GF_MULC, GF_MULC0(c) and GF_ADDMULC(x) can be used when multiplying
> + * many numbers by the same constant. In this case the first call sets the
> + * constant, and others perform the multiplications.  A value related to the
> + * multiplication is held in a local variable declared with USE_GF_MULC . See
> + * usage in _addmul1().
> + */
> +static uint8_t gf_mul_table[256][256];
> +
> +#define gf_mul(x, y) gf_mul_table[x][y]
> +
> +#define USE_GF_MULC register uint8_t *__gf_mulc_
> +
> +#define GF_MULC0(c) __gf_mulc_ = gf_mul_table[c]
> +#define GF_ADDMULC(dst, x) dst ^= __gf_mulc_[x]
> +
> +/*
> + * Generate GF(2**m) from the irreducible polynomial p(X) in p[0]..p[m]
> + * Lookup tables:
> + *     idx->polynomial form		gf_exp[] contains j= \alpha^i;
> + *     polynomial form -> idx form	gf_log[ j = \alpha^i ] = i
> + * \alpha=x is the primitive element of GF(2^m)
> + *
> + * For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple
> + * multiplication of two numbers can be resolved without calling modnn
> + */
> +static void _init_mul_table(void)
> +{
> +	int i, j;
> +	for (i = 0; i < 256; i++)
> +		for (j = 0; j < 256; j++)
> +			gf_mul_table[i][j] = gf_exp[modnn(gf_log[i] +
> +							  gf_log[j])];
> +
> +	for (j = 0; j < 256; j++)
> +		gf_mul_table[0][j] = gf_mul_table[j][0] = 0;
> +}
> +
> +#define NEW_GF_MATRIX(rows, cols) \
> +	(uint8_t *)malloc(rows * cols)

You should use xmalloc() instead of malloc(). Other callers of
malloc() should also be replaced.

Thanks,
Hitoshi