[sheepdog] [PATCH v2 1/4] erasure: make ec_helpers more generic

Liu Yuan namei.unix at gmail.com
Thu Oct 17 20:05:58 CEST 2013


This will allow us to pass dynamic data and parity combination to make different
erasure coding possible.

At most we support 7 parity strip, which means we can stand with 7 nodes failure
at the same time.

Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
 include/fec.h    |   41 +++++-----
 lib/fec.c        |  230 ++++++++++++++++++++++++++++--------------------------
 sheep/gateway.c  |   61 ++++++++-------
 sheep/recovery.c |    9 ++-
 4 files changed, 179 insertions(+), 162 deletions(-)

diff --git a/include/fec.h b/include/fec.h
index ff79edb..55e0bdb 100644
--- a/include/fec.h
+++ b/include/fec.h
@@ -61,16 +61,16 @@
 
 struct fec {
 	unsigned long magic;
-	unsigned short k, n;                     /* parameters of the code */
+	unsigned short d, dp;                     /* parameters of the code */
 	uint8_t *enc_matrix;
 };
 
 void init_fec(void);
 /*
- * param k the number of blocks required to reconstruct
- * param m the total number of blocks created
+ * param d the number of blocks required to reconstruct
+ * param dp the total number of blocks created
  */
-struct fec *fec_new(unsigned short k, unsigned short m);
+struct fec *fec_new(unsigned short d, unsigned short dp);
 void fec_free(struct fec *p);
 
 /*
@@ -104,7 +104,6 @@ void fec_decode(const struct fec *code,
 #define SD_EC_D	4 /* No. of data strips */
 #define SD_EC_P 2 /* No. of parity strips */
 #define SD_EC_DP (SD_EC_D + SD_EC_P)
-#define SD_EC_STRIP_SIZE (256)
 
 /*
  * SD_EC_D_SIZE <= 1K is the safe value to run VM after some experimentations.
@@ -115,10 +114,9 @@ void fec_decode(const struct fec *code,
  * failed (grub got screwed) and 1K is probably the biggest value if we want
  * VM to run on erasure coded volume.
  */
-#define SD_EC_D_SIZE (SD_EC_STRIP_SIZE * SD_EC_D)
+#define SD_EC_DATA_STRIPE_SIZE (1024) /* 1K */
 #define SD_EC_OBJECT_SIZE (SD_DATA_OBJ_SIZE / SD_EC_D)
-#define SD_EC_STRIPE (SD_EC_STRIP_SIZE * SD_EC_DP)
-#define SD_EC_NR_STRIPE_PER_OBJECT (SD_EC_OBJECT_SIZE / SD_EC_STRIP_SIZE)
+#define SD_EC_NR_STRIPE_PER_OBJECT (SD_DATA_OBJ_SIZE / SD_EC_DATA_STRIPE_SIZE)
 
 /*
  * Stripe: data strips + parity strips, spread on all replica
@@ -127,19 +125,19 @@ void fec_decode(const struct fec *code,
  * R: Replica
  *
  *  +--------------------stripe ----------------------+
- *  v                                                 v
- * +----+----------------------------------------------+
+ *  v   data stripe                   parity stripe   v
+ * +----+----+----+----+----+-----+----+----+-----+----+
  * | ds | ds | ds | ds | ds | ... | ps | ps | ... | ps |
- * +----+----------------------------------------------+
+ * +----+----+----+----+----+-----+----+----+-----+----+
  * | .. | .. | .. | .. | .. | ... | .. | .. | ... | .. |
  * +----+----+----+----+----+ ... +----+----+-----+----+
  *  R1    R2   R3   R4   R5   ...   Rn  Rn+1  Rn+2  Rn+3
  */
 
 /* Return the erasure code context to encode|decode */
-static inline struct fec *ec_init(void)
+static inline struct fec *ec_init(int d, int dp)
 {
-	return fec_new(SD_EC_D, SD_EC_DP);
+	return fec_new(d, dp);
 }
 
 /*
@@ -148,13 +146,16 @@ static inline struct fec *ec_init(void)
  * @ds: data strips to generate parity strips
  * @ps: parity strips to return
  */
-static inline void ec_encode(struct fec *ctx, const uint8_t *ds[SD_EC_D],
-			     uint8_t *ps[SD_EC_P])
+static inline void ec_encode(struct fec *ctx, const uint8_t *ds[],
+			     uint8_t *ps[])
 {
-	int total = SD_EC_D + SD_EC_P;
-	const int pidx[SD_EC_P] = { total - 2, total - 1 };
+	int p = ctx->dp - ctx->d;
+	int pidx[p];
 
-	fec_encode(ctx, ds, ps, pidx, SD_EC_P, SD_EC_STRIP_SIZE);
+	for (int i = 0; i < p; i++)
+		pidx[i] = ctx->d + i;
+
+	fec_encode(ctx, ds, ps, pidx, p, SD_EC_DATA_STRIPE_SIZE / ctx->d);
 }
 
 /*
@@ -166,8 +167,8 @@ static inline void ec_encode(struct fec *ctx, const uint8_t *ds[SD_EC_D],
  * @output: the lost ds or ps to return
  * @idx: index of output which is lost
  */
-void ec_decode(struct fec *ctx, const uint8_t *input[SD_EC_D],
-	       const int inidx[SD_EC_D],
+void ec_decode(struct fec *ctx, const uint8_t *input[],
+	       const int inidx[],
 	       uint8_t output[], int idx);
 
 /* Destroy the erasure code context */
diff --git a/lib/fec.c b/lib/fec.c
index 6deafe4..05dffa0 100644
--- a/lib/fec.c
+++ b/lib/fec.c
@@ -251,18 +251,18 @@ static void _addmul1(register uint8_t *dst,
 		GF_ADDMULC(*dst, *src);
 }
 
-/* computes C = AB where A is n*k, B is k*m, C is n*m */
-static void _matmul(uint8_t *a, uint8_t *b, uint8_t *c, unsigned n, unsigned k,
+/* computes C = AB where A is dp*d, B is d*m, C is dp*m */
+static void _matmul(uint8_t *a, uint8_t *b, uint8_t *c, unsigned dp, unsigned d,
 		    unsigned m)
 {
 	unsigned row, col, i;
 
-	for (row = 0; row < n; row++) {
+	for (row = 0; row < dp; row++) {
 		for (col = 0; col < m; col++) {
-			uint8_t *pa = &a[row * k];
+			uint8_t *pa = &a[row * d];
 			uint8_t *pb = &b[col];
 			uint8_t acc = 0;
-			for (i = 0; i < k; i++, pa++, pb += m)
+			for (i = 0; i < d; i++, pa++, pb += m)
 				acc ^= gf_mul(*pa, *pb);
 			c[row * m + col] = acc;
 		}
@@ -271,43 +271,43 @@ static void _matmul(uint8_t *a, uint8_t *b, uint8_t *c, unsigned n, unsigned k,
 
 /*
  * _invert_mat() takes a matrix and produces its inverse
- * k is the size of the matrix.
+ * d is the size of the matrix.
  * (Gauss-Jordan, adapted from Numerical Recipes in C)
  * Return non-zero if singular.
  */
-static void _invert_mat(uint8_t *src, unsigned k)
+static void _invert_mat(uint8_t *src, unsigned d)
 {
 	uint8_t c, *p;
 	unsigned irow = 0;
 	unsigned icol = 0;
 	unsigned row, col, i, ix;
 
-	unsigned *indxc = (unsigned *)xmalloc(k * sizeof(unsigned));
-	unsigned *indxr = (unsigned *)xmalloc(k * sizeof(unsigned));
-	unsigned *ipiv = (unsigned *)xmalloc(k * sizeof(unsigned));
-	uint8_t *id_row = NEW_GF_MATRIX(1, k);
+	unsigned *indxc = (unsigned *)xmalloc(d * sizeof(unsigned));
+	unsigned *indxr = (unsigned *)xmalloc(d * sizeof(unsigned));
+	unsigned *ipiv = (unsigned *)xmalloc(d * sizeof(unsigned));
+	uint8_t *id_row = NEW_GF_MATRIX(1, d);
 
-	memset(id_row, '\0', k * sizeof(uint8_t));
+	memset(id_row, '\0', d * sizeof(uint8_t));
 	/* ipiv marks elements already used as pivots. */
-	for (i = 0; i < k; i++)
+	for (i = 0; i < d; i++)
 		ipiv[i] = 0;
 
-	for (col = 0; col < k; col++) {
+	for (col = 0; col < d; col++) {
 		uint8_t *pivot_row;
 		/*
 		 * Zeroing column 'col', look for a non-zero element.
 		 * First try on the diagonal, if it fails, look elsewhere.
 		 */
-		if (ipiv[col] != 1 && src[col * k + col] != 0) {
+		if (ipiv[col] != 1 && src[col * d + col] != 0) {
 			irow = col;
 			icol = col;
 			goto found_piv;
 		}
-		for (row = 0; row < k; row++) {
+		for (row = 0; row < d; row++) {
 			if (ipiv[row] != 1) {
-				for (ix = 0; ix < k; ix++) {
+				for (ix = 0; ix < d; ix++) {
 					if (ipiv[ix] == 0) {
-						if (src[row * k + ix] != 0) {
+						if (src[row * d + ix] != 0) {
 							irow = row;
 							icol = ix;
 							goto found_piv;
@@ -325,11 +325,11 @@ found_piv:
 		 * optimizing.
 		 */
 		if (irow != icol)
-			for (ix = 0; ix < k; ix++)
-				SWAP(src[irow*k + ix], src[icol*k + ix]);
+			for (ix = 0; ix < d; ix++)
+				SWAP(src[irow*d + ix], src[icol*d + ix]);
 		indxr[col] = irow;
 		indxc[col] = icol;
-		pivot_row = &src[icol * k];
+		pivot_row = &src[icol * d];
 		c = pivot_row[icol];
 		assert(c != 0);
 		if (c != 1) {   /* otherwhise this is a NOP */
@@ -339,7 +339,7 @@ found_piv:
 			 */
 			c = inverse[c];
 			pivot_row[icol] = 1;
-			for (ix = 0; ix < k; ix++)
+			for (ix = 0; ix < d; ix++)
 				pivot_row[ix] = gf_mul(c, pivot_row[ix]);
 		}
 		/*
@@ -350,22 +350,22 @@ found_piv:
 		 * we can optimize the addmul).
 		 */
 		id_row[icol] = 1;
-		if (memcmp(pivot_row, id_row, k * sizeof(uint8_t)) != 0) {
-			for (p = src, ix = 0; ix < k; ix++, p += k) {
+		if (memcmp(pivot_row, id_row, d * sizeof(uint8_t)) != 0) {
+			for (p = src, ix = 0; ix < d; ix++, p += d) {
 				if (ix != icol) {
 					c = p[icol];
 					p[icol] = 0;
-					addmul(p, pivot_row, c, k);
+					addmul(p, pivot_row, c, d);
 				}
 			}
 		}
 		id_row[icol] = 0;
 	}                           /* done all columns */
-	for (col = k; col > 0; col--)
+	for (col = d; col > 0; col--)
 		if (indxr[col-1] != indxc[col-1])
-			for (row = 0; row < k; row++)
-				SWAP(src[row * k + indxr[col-1]],
-				     src[row * k + indxc[col-1]]);
+			for (row = 0; row < d; row++)
+				SWAP(src[row * d + indxr[col-1]],
+				     src[row * d + indxc[col-1]]);
 }
 
 /*
@@ -379,51 +379,51 @@ found_piv:
  * p = coefficients of the matrix (p_i)
  * q = values of the polynomial (known)
  */
-static void _invert_vdm(uint8_t *src, unsigned k)
+static void _invert_vdm(uint8_t *src, unsigned d)
 {
 	unsigned i, j, row, col;
 	uint8_t *b, *c, *p;
 	uint8_t t, xx;
 
-	if (k == 1)     /* degenerate case, matrix must be p^0 = 1 */
+	if (d == 1)     /* degenerate case, matrix must be p^0 = 1 */
 		return;
 	/*
-	 * c holds the coefficient of P(x) = Prod (x - p_i), i=0..k-1
+	 * c holds the coefficient of P(x) = Prod (x - p_i), i=0..d-1
 	 * b holds the coefficient for the matrix inversion
 	 */
-	c = NEW_GF_MATRIX(1, k);
-	b = NEW_GF_MATRIX(1, k);
-	p = NEW_GF_MATRIX(1, k);
+	c = NEW_GF_MATRIX(1, d);
+	b = NEW_GF_MATRIX(1, d);
+	p = NEW_GF_MATRIX(1, d);
 
-	for (j = 1, i = 0; i < k; i++, j += k) {
+	for (j = 1, i = 0; i < d; i++, j += d) {
 		c[i] = 0;
 		p[i] = src[j];            /* p[i] */
 	}
 	/*
-	 * construct coeffs. recursively. We know c[k] = 1 (implicit)
+	 * construct coeffs. recursively. We know c[d] = 1 (implicit)
 	 * and start P_0 = x - p_0, then at each stage multiply by
 	 * x - p_i generating P_i = x P_{i-1} - p_i P_{i-1}
-	 * After k steps we are done.
+	 * After d steps we are done.
 	 */
-	c[k - 1] = p[0];              /* really -p(0), but x = -x in GF(2^m) */
-	for (i = 1; i < k; i++) {
+	c[d - 1] = p[0];              /* really -p(0), but x = -x in GF(2^m) */
+	for (i = 1; i < d; i++) {
 		uint8_t p_i = p[i];            /* see above comment */
-		for (j = k - 1 - (i - 1); j < k - 1; j++)
+		for (j = d - 1 - (i - 1); j < d - 1; j++)
 			c[j] ^= gf_mul(p_i, c[j + 1]);
-		c[k - 1] ^= p_i;
+		c[d - 1] ^= p_i;
 	}
 
-	for (row = 0; row < k; row++) {
+	for (row = 0; row < d; row++) {
 		/* synthetic division etc. */
 		xx = p[row];
 		t = 1;
-		b[k - 1] = 1;             /* this is in fact c[k] */
-		for (i = k - 1; i > 0; i--) {
+		b[d - 1] = 1;             /* this is in fact c[d] */
+		for (i = d - 1; i > 0; i--) {
 			b[i-1] = c[i] ^ gf_mul(xx, b[i]);
 			t = gf_mul(xx, t) ^ b[i-1];
 		}
-		for (col = 0; col < k; col++)
-			src[col * k + row] = gf_mul(inverse[t], b[col]);
+		for (col = 0; col < d; col++)
+			src[col * d + row] = gf_mul(inverse[t], b[col]);
 	}
 	free(c);
 	free(b);
@@ -447,13 +447,13 @@ void init_fec(void)
 
 void fec_free(struct fec *p)
 {
-	assert(p != NULL && p->magic == (((FEC_MAGIC ^ p->k) ^ p->n) ^
+	assert(p != NULL && p->magic == (((FEC_MAGIC ^ p->d) ^ p->dp) ^
 					 (unsigned long) (p->enc_matrix)));
 	free(p->enc_matrix);
 	free(p);
 }
 
-struct fec *fec_new(unsigned short k, unsigned short n)
+struct fec *fec_new(unsigned short d, unsigned short dp)
 {
 	unsigned row, col;
 	uint8_t *p, *tmp_m;
@@ -461,32 +461,32 @@ struct fec *fec_new(unsigned short k, unsigned short n)
 	struct fec *retval;
 
 	retval = (struct fec *)xmalloc(sizeof(struct fec));
-	retval->k = k;
-	retval->n = n;
-	retval->enc_matrix = NEW_GF_MATRIX(n, k);
-	retval->magic = ((FEC_MAGIC^k)^n)^(unsigned long)(retval->enc_matrix);
-	tmp_m = NEW_GF_MATRIX(n, k);
+	retval->d = d;
+	retval->dp = dp;
+	retval->enc_matrix = NEW_GF_MATRIX(dp, d);
+	retval->magic = ((FEC_MAGIC^d)^dp)^(unsigned long)(retval->enc_matrix);
+	tmp_m = NEW_GF_MATRIX(dp, d);
 	/*
 	 * fill the matrix with powers of field elements, starting from 0.
 	 * The first row is special, cannot be computed with exp. table.
 	 */
 	tmp_m[0] = 1;
-	for (col = 1; col < k; col++)
+	for (col = 1; col < d; col++)
 		tmp_m[col] = 0;
-	for (p = tmp_m + k, row = 0; row < n - 1; row++, p += k)
-		for (col = 0; col < k; col++)
+	for (p = tmp_m + d, row = 0; row < dp - 1; row++, p += d)
+		for (col = 0; col < d; col++)
 			p[col] = gf_exp[modnn(row * col)];
 
 	/*
 	 * quick code to build systematic matrix: invert the top
-	 * k*k vandermonde matrix, multiply right the bottom n-k rows
+	 * d*d vandermonde matrix, multiply right the bottom dp-d rows
 	 * by the inverse, and construct the identity matrix at the top.
 	 */
-	_invert_vdm(tmp_m, k);        /* much faster than _invert_mat */
-	_matmul(tmp_m + k * k, tmp_m, retval->enc_matrix + k * k, n - k, k, k);
+	_invert_vdm(tmp_m, d);        /* much faster than _invert_mat */
+	_matmul(tmp_m + d * d, tmp_m, retval->enc_matrix + d * d, dp - d, d, d);
 	/* the upper matrix is I so do not bother with a slow multiply */
-	memset(retval->enc_matrix, '\0', k * k * sizeof(uint8_t));
-	for (p = retval->enc_matrix, col = 0; col < k; col++, p += k + 1)
+	memset(retval->enc_matrix, '\0', d * d * sizeof(uint8_t));
+	for (p = retval->enc_matrix, col = 0; col < d; col++, p += d + 1)
 		*p = 1;
 	free(tmp_m);
 
@@ -508,19 +508,19 @@ void fec_encode(const struct fec *code,
 		size_t num_block_nums, size_t sz)
 {
 	unsigned char i, j;
-	size_t k;
+	size_t d;
 	unsigned fecnum;
 	const uint8_t *p;
 
-	for (k = 0; k < sz; k += STRIDE) {
-		size_t stride = ((sz-k) < STRIDE) ? (sz-k) : STRIDE;
+	for (d = 0; d < sz; d += STRIDE) {
+		size_t stride = ((sz-d) < STRIDE) ? (sz-d) : STRIDE;
 		for (i = 0; i < num_block_nums; i++) {
 			fecnum = block_nums[i];
-			assert(fecnum >= code->k);
-			memset(fecs[i]+k, 0, stride);
-			p = &(code->enc_matrix[fecnum * code->k]);
-			for (j = 0; j < code->k; j++)
-				addmul(fecs[i]+k, src[j]+k, p[j], stride);
+			assert(fecnum >= code->d);
+			memset(fecs[i]+d, 0, stride);
+			p = &(code->enc_matrix[fecnum * code->d]);
+			for (j = 0; j < code->d; j++)
+				addmul(fecs[i]+d, src[j]+d, p[j], stride);
 		}
 	}
 }
@@ -528,24 +528,24 @@ void fec_encode(const struct fec *code,
 /*
  * Build decode matrix into some memory space.
  *
- * @param matrix a space allocated for a k by k matrix
+ * @param matrix a space allocated for a d by d matrix
  */
 static void
 build_decode_matrix_into_space(const struct fec *const code,
 			       const int *const idx,
-			       const unsigned k, uint8_t *const matrix)
+			       const unsigned d, uint8_t *const matrix)
 {
 	unsigned char i;
 	uint8_t *p;
-	for (i = 0, p = matrix; i < k; i++, p += k) {
-		if (idx[i] < k) {
-			memset(p, 0, k);
+	for (i = 0, p = matrix; i < d; i++, p += d) {
+		if (idx[i] < d) {
+			memset(p, 0, d);
 			p[i] = 1;
 		} else {
-			memcpy(p, &(code->enc_matrix[idx[i] * code->k]), k);
+			memcpy(p, &(code->enc_matrix[idx[i] * code->d]), d);
 		}
 	}
-	_invert_mat(matrix, k);
+	_invert_mat(matrix, d);
 }
 
 void fec_decode(const struct fec *code,
@@ -553,25 +553,25 @@ void fec_decode(const struct fec *code,
 		uint8_t *const *const outpkts,
 		const int *const idx, size_t sz)
 {
-	uint8_t m_dec[code->k * code->k];
+	uint8_t m_dec[code->d * code->d];
 	unsigned char outix = 0;
 	unsigned char row = 0;
 	unsigned char col = 0;
 
-	assert(code->k * code->k < 8 * 1024 * 1024);
-	build_decode_matrix_into_space(code, idx, code->k, m_dec);
+	assert(code->d * code->d < 8 * 1024 * 1024);
+	build_decode_matrix_into_space(code, idx, code->d, m_dec);
 
-	for (row = 0; row < code->k; row++) {
+	for (row = 0; row < code->d; row++) {
 		/*
 		 * If the block whose number is i is present, then it is
 		 * required to be in the i'th element.
 		 */
-		assert((idx[row] >= code->k) || (idx[row] == row));
-		if (idx[row] >= code->k) {
+		assert((idx[row] >= code->d) || (idx[row] == row));
+		if (idx[row] >= code->d) {
 			memset(outpkts[outix], 0, sz);
-			for (col = 0; col < code->k; col++)
+			for (col = 0; col < code->d; col++)
 				addmul(outpkts[outix], inpkts[col],
-				       m_dec[row * code->k + col], sz);
+				       m_dec[row * code->d + col], sz);
 			outix++;
 		}
 	}
@@ -584,19 +584,20 @@ void fec_decode(const struct fec *code,
  *
  * Return out and outidx as fec_decode requested.
  */
-static inline void decode_prepare(const uint8_t *dp[], const uint8_t *out[],
+static inline void decode_prepare(struct fec *ctx, const uint8_t *dp[],
+				  const uint8_t *out[],
 				  int outidx[])
 {
 	int i, p = 0;
 
-	for (i = SD_EC_D; i < SD_EC_DP; i++) {
+	for (i = ctx->d; i < ctx->dp; i++) {
 		if (dp[i]) {
 			p = i;
 			break;
 		}
 	}
 
-	for (i = 0; i < SD_EC_D; i++) {
+	for (i = 0; i < ctx->d; i++) {
 		if (dp[i]) {
 			out[i] = dp[i];
 			outidx[i] = i;
@@ -608,9 +609,9 @@ static inline void decode_prepare(const uint8_t *dp[], const uint8_t *out[],
 	}
 }
 
-static inline bool data_is_missing(const uint8_t *dp[])
+static inline bool data_is_missing(const uint8_t *dp[], int d)
 {
-	for (int i = 0; i < SD_EC_D; i++)
+	for (int i = 0; i < d; i++)
 		if (!dp[i])
 			return true;
 	return false;
@@ -625,39 +626,50 @@ static inline bool data_is_missing(const uint8_t *dp[])
  * @output: the lost ds or ps to return
  * @idx: index of output which is lost
  */
-void ec_decode(struct fec *ctx, const uint8_t *input[SD_EC_D],
-	       const int inidx[SD_EC_D],
+void ec_decode(struct fec *ctx, const uint8_t *input[], const int inidx[],
 	       uint8_t output[], int idx)
 {
-	const uint8_t *dp[SD_EC_DP] = { NULL };
-	const uint8_t *oin[SD_EC_D] = { NULL };
-	int oidx[SD_EC_D] = { 0 }, i;
-	uint8_t m0[SD_EC_STRIP_SIZE], m1[SD_EC_STRIP_SIZE],
-		p0[SD_EC_STRIP_SIZE], p1[SD_EC_STRIP_SIZE];
-	uint8_t *missing[SD_EC_P] = { m0, m1 };
-	uint8_t *p[SD_EC_P] = { p0, p1 };
-
-	for (i = 0; i < SD_EC_D; i++)
+	int edp = ctx->dp, ep = ctx->dp - ctx->d, ed = ctx->d;
+	const uint8_t *dp[edp];
+	const uint8_t *oin[ed];
+	int oidx[ed], i;
+	int strip_size = SD_EC_DATA_STRIPE_SIZE / ed;
+	uint8_t m0[strip_size], m1[strip_size], m2[strip_size], m3[strip_size],
+		m4[strip_size], m5[strip_size], m6[strip_size], m7[strip_size],
+		p0[strip_size], p1[strip_size], p2[strip_size], p3[strip_size],
+		p4[strip_size], p5[strip_size], p6[strip_size], p7[strip_size];
+#define SD_EC_MAX_PARITY 8
+	uint8_t *missing[SD_EC_MAX_PARITY] = { m0, m1, m2, m3, m4, m5, m6, m7 };
+	uint8_t *p[SD_EC_MAX_PARITY] = { p0, p1, p2, p3, p4, p5, p6, p7 };
+
+	for (i = 0; i < edp; i++)
+		dp[i] = NULL;
+	for (i = 0; i < ed; i++)
+		oin[i] = NULL;
+	for (i = 0; i < ed; i++)
+		oidx[i] = 0;
+
+	for (i = 0; i < ed; i++)
 		dp[inidx[i]] = input[i];
 
-	decode_prepare(dp, oin, oidx);
+	decode_prepare(ctx, dp, oin, oidx);
 
 	/* Fill the data strip if missing */
-	if (data_is_missing(dp)) {
+	if (data_is_missing(dp, ed)) {
 		int m = 0;
-		fec_decode(ctx, oin, missing, oidx, SD_EC_STRIP_SIZE);
-		for (i = 0; i < SD_EC_D; i++)
+		fec_decode(ctx, oin, missing, oidx, strip_size);
+		for (i = 0; i < ed; i++)
 			if (!dp[i])
 				dp[i] = missing[m++];
 	}
 
-	if (idx < SD_EC_D)
+	if (idx < ed)
 		goto out;
 
 	/* Fill the parity strip */
 	ec_encode(ctx, dp, p);
-	for (i = 0; i < SD_EC_P; i++)
-		dp[SD_EC_D + i] = p[i];
+	for (i = 0; i < ep; i++)
+		dp[ed + i] = p[i];
 out:
-	memcpy(output, dp[idx], SD_EC_STRIP_SIZE);
+	memcpy(output, dp[idx], strip_size);
 }
diff --git a/sheep/gateway.c b/sheep/gateway.c
index 08b9c1e..4d7e0e2 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -48,11 +48,12 @@ static struct req_iter *prepare_replication_requests(struct request *req,
 }
 
 /*
- * Make sure we don't overwrite the existing data for unaligned write
+ * Make sure we don't overwrite the existing data for misaligned write
  *
- * If either offset or length of request isn't aligned to SD_EC_D_SIZE, we have
- * to read the unaligned blocks before write. This kind of write amplification
- * indeed slow down the write operation with extra read overhead.
+ * If either offset or length of request isn't aligned to
+ * SD_EC_DATA_STRIPE_SIZE, we have to read the unaligned blocks before write.
+ * This kind of write amplification indeed slow down the write operation with
+ * extra read overhead.
  */
 static void *init_erasure_buffer(struct request *req, int buf_len)
 {
@@ -62,18 +63,18 @@ static void *init_erasure_buffer(struct request *req, int buf_len)
 	uint64_t oid = req->rq.obj.oid;
 	int opcode = req->rq.opcode;
 	struct sd_req hdr;
-	uint64_t head = round_down(off, SD_EC_D_SIZE);
-	uint64_t tail = round_down(off + len, SD_EC_D_SIZE);
+	uint64_t head = round_down(off, SD_EC_DATA_STRIPE_SIZE);
+	uint64_t tail = round_down(off + len, SD_EC_DATA_STRIPE_SIZE);
 	int ret;
 
 	if (opcode != SD_OP_WRITE_OBJ)
 		goto out;
 
-	if (off % SD_EC_D_SIZE) {
+	if (off % SD_EC_DATA_STRIPE_SIZE) {
 		/* Read head */
 		sd_init_req(&hdr, SD_OP_READ_OBJ);
 		hdr.obj.oid = oid;
-		hdr.data_length = SD_EC_D_SIZE;
+		hdr.data_length = SD_EC_DATA_STRIPE_SIZE;
 		hdr.obj.offset = head;
 		ret = exec_local_req(&hdr, buf);
 		if (ret != SD_RES_SUCCESS) {
@@ -82,11 +83,11 @@ static void *init_erasure_buffer(struct request *req, int buf_len)
 		}
 	}
 
-	if ((len + off) % SD_EC_D_SIZE && tail - head > 0) {
+	if ((len + off) % SD_EC_DATA_STRIPE_SIZE && tail - head > 0) {
 		/* Read tail */
 		sd_init_req(&hdr, SD_OP_READ_OBJ);
 		hdr.obj.oid = oid;
-		hdr.data_length = SD_EC_D_SIZE;
+		hdr.data_length = SD_EC_DATA_STRIPE_SIZE;
 		hdr.obj.offset = tail;
 		ret = exec_local_req(&hdr, buf + tail - head);
 		if (ret != SD_RES_SUCCESS) {
@@ -95,7 +96,7 @@ static void *init_erasure_buffer(struct request *req, int buf_len)
 		}
 	}
 out:
-	memcpy(buf + off % SD_EC_D_SIZE, req->data, len);
+	memcpy(buf + off % SD_EC_DATA_STRIPE_SIZE, req->data, len);
 	return buf;
 }
 
@@ -108,11 +109,12 @@ static struct req_iter *prepare_erasure_requests(struct request *req, int *nr)
 	uint32_t len = req->rq.data_length;
 	uint64_t off = req->rq.obj.offset;
 	int opcode = req->rq.opcode;
-	int start = off / SD_EC_D_SIZE;
-	int end = DIV_ROUND_UP(off + len, SD_EC_D_SIZE), i, j;
+	int start = off / SD_EC_DATA_STRIPE_SIZE;
+	int end = DIV_ROUND_UP(off + len, SD_EC_DATA_STRIPE_SIZE), i, j;
 	int nr_stripe = end - start;
-	struct fec *ctx = ec_init();
+	struct fec *ctx = ec_init(SD_EC_D, SD_EC_DP);
 	int nr_to_send = (opcode == SD_OP_READ_OBJ) ? SD_EC_D : SD_EC_DP;
+	int strip_size = SD_EC_DATA_STRIPE_SIZE / SD_EC_D;
 	struct req_iter *reqs = xzalloc(sizeof(*reqs) * nr_to_send);
 	char *p, *buf = NULL;
 
@@ -121,11 +123,11 @@ static struct req_iter *prepare_erasure_requests(struct request *req, int *nr)
 
 	*nr = nr_to_send;
 	for (i = 0; i < nr_to_send; i++) {
-		int l = SD_EC_STRIP_SIZE * nr_stripe;
+		int l = strip_size * nr_stripe;
 
 		reqs[i].buf = xmalloc(l);
 		reqs[i].dlen = l;
-		reqs[i].off = start * SD_EC_STRIP_SIZE;
+		reqs[i].off = start * strip_size;
 		switch (opcode) {
 		case SD_OP_CREATE_AND_WRITE_OBJ:
 		case SD_OP_WRITE_OBJ:
@@ -139,7 +141,7 @@ static struct req_iter *prepare_erasure_requests(struct request *req, int *nr)
 	if (opcode != SD_OP_WRITE_OBJ && opcode != SD_OP_CREATE_AND_WRITE_OBJ)
 		goto out; /* Read and remove operation */
 
-	p = buf = init_erasure_buffer(req, SD_EC_D_SIZE * nr_stripe);
+	p = buf = init_erasure_buffer(req, SD_EC_DATA_STRIPE_SIZE * nr_stripe);
 	if (!buf) {
 		sd_err("failed to init erasure buffer %"PRIx64,
 		       req->rq.obj.oid);
@@ -152,16 +154,16 @@ static struct req_iter *prepare_erasure_requests(struct request *req, int *nr)
 		uint8_t *ps[SD_EC_P];
 
 		for (j = 0; j < SD_EC_D; j++)
-			ds[j] = reqs[j].buf + SD_EC_STRIP_SIZE * i;
+			ds[j] = reqs[j].buf + strip_size * i;
 
 		for (j = 0; j < SD_EC_P; j++)
-			ps[j] = reqs[SD_EC_D + j].buf + SD_EC_STRIP_SIZE * i;
+			ps[j] = reqs[SD_EC_D + j].buf + strip_size * i;
 
 		for (j = 0; j < SD_EC_D; j++)
-			memcpy((uint8_t *)ds[j], p + j * SD_EC_STRIP_SIZE,
-			       SD_EC_STRIP_SIZE);
+			memcpy((uint8_t *)ds[j], p + j * strip_size,
+			       strip_size);
 		ec_encode(ctx, ds, ps);
-		p += SD_EC_D_SIZE;
+		p += SD_EC_DATA_STRIPE_SIZE;
 	}
 out:
 	ec_destroy(ctx);
@@ -212,8 +214,8 @@ static void finish_requests(struct request *req, struct req_iter *reqs,
 	uint32_t len = req->rq.data_length;
 	uint64_t off = req->rq.obj.offset;
 	int opcode = req->rq.opcode;
-	int start = off / SD_EC_D_SIZE;
-	int end = DIV_ROUND_UP(off + len, SD_EC_D_SIZE), i, j;
+	int start = off / SD_EC_DATA_STRIPE_SIZE;
+	int end = DIV_ROUND_UP(off + len, SD_EC_DATA_STRIPE_SIZE), i, j;
 	int nr_stripe = end - start;
 
 	if (!is_erasure_oid(oid))
@@ -224,17 +226,18 @@ static void finish_requests(struct request *req, struct req_iter *reqs,
 
 	/* We need to assemble the data strips into the req buffer for read */
 	if (opcode == SD_OP_READ_OBJ) {
-		char *p, *buf = xmalloc(SD_EC_D_SIZE * nr_stripe);
+		char *p, *buf = xmalloc(SD_EC_DATA_STRIPE_SIZE * nr_stripe);
+		int strip_size = SD_EC_DATA_STRIPE_SIZE / SD_EC_D;
 
 		p = buf;
 		for (i = 0; i < nr_stripe; i++) {
 			for (j = 0; j < nr_to_send; j++) {
-				memcpy(p, reqs[j].buf + SD_EC_STRIP_SIZE * i,
-				       SD_EC_STRIP_SIZE);
-				p += SD_EC_STRIP_SIZE;
+				memcpy(p, reqs[j].buf + strip_size * i,
+				       strip_size);
+				p += strip_size;
 			}
 		}
-		memcpy(req->data, buf + off % SD_EC_D_SIZE, len);
+		memcpy(req->data, buf + off % SD_EC_DATA_STRIPE_SIZE, len);
 		req->rp.data_length = req->rq.data_length;
 		free(buf);
 	}
diff --git a/sheep/recovery.c b/sheep/recovery.c
index e63e3d6..037d2c3 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -392,7 +392,7 @@ static void *rebuild_erasure_object(uint64_t oid, uint8_t idx,
 {
 	uint8_t *bufs[SD_EC_D] = { 0 };
 	int idxs[SD_EC_D], len = get_store_objsize(oid);
-	struct fec *ctx = ec_init();
+	struct fec *ctx = ec_init(SD_EC_D, SD_EC_DP);
 	char *lost = xvalloc(len);
 	int i, j;
 
@@ -414,12 +414,13 @@ static void *rebuild_erasure_object(uint64_t oid, uint8_t idx,
 	/* Rebuild the lost replica */
 	for (i = 0; i < SD_EC_NR_STRIPE_PER_OBJECT; i++) {
 		const uint8_t *in[SD_EC_D];
-		uint8_t out[SD_EC_STRIP_SIZE];
+		int strip_size = SD_EC_DATA_STRIPE_SIZE / SD_EC_D;
+		uint8_t out[strip_size];
 
 		for (j = 0; j < SD_EC_D; j++)
-			in[j] = bufs[j] + SD_EC_STRIP_SIZE * i;
+			in[j] = bufs[j] + strip_size * i;
 		ec_decode(ctx, in, idxs, out, idx);
-		memcpy(lost + SD_EC_STRIP_SIZE * i, out, SD_EC_STRIP_SIZE);
+		memcpy(lost + strip_size * i, out, strip_size);
 	}
 out:
 	ec_destroy(ctx);
-- 
1.7.9.5




More information about the sheepdog mailing list