[sheepdog] [PATCH v2 02/21] sheep: use ec_index to remember strip placement

Liu Yuan namei.unix at gmail.com
Wed Oct 16 07:50:28 CEST 2013


ec_index is used to tag the replica for data striping since for erasure coding,
each replica are indexed as different strip holders for the object.

We have no room for ec_index in the struct sd_req so we make use of
'uint64_t offset' which effectively we only use its first 32 bits. By squeezing
ec_index into this field, we won't change the layout of sd_req.

Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
 include/sheepdog_proto.h |    6 ++++--
 sheep/gateway.c          |    1 +
 sheep/object_cache.c     |    2 +-
 sheep/ops.c              |    1 +
 sheep/plain_store.c      |   22 +++++++++++++++++++---
 sheep/sheep_priv.h       |    3 ++-
 6 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 04399bc..7138608 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -127,9 +127,11 @@ struct sd_req {
 			uint64_t	cow_oid;
 			uint8_t		copies;
 			uint8_t		copy_policy;
-			uint8_t		reserved[2];
+			uint8_t		ec_index;
+			uint8_t		reserved;
 			uint32_t	tgt_epoch;
-			uint64_t	offset;
+			uint32_t	offset;
+			uint32_t	__pad;
 		} obj;
 		struct {
 			uint64_t	vdi_size;
diff --git a/sheep/gateway.c b/sheep/gateway.c
index 1660f0b..a2dc7e4 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -498,6 +498,7 @@ static int gateway_forward_request(struct request *req)
 		hdr.data_length = reqs[i].dlen;
 		wlen = reqs[i].wlen;
 		hdr.obj.offset = reqs[i].off;
+		hdr.obj.ec_index = i;
 		ret = send_req(sfd->fd, &hdr, reqs[i].buf, wlen,
 			       sheep_need_retry, req->rq.epoch,
 			       MAX_RETRY_COUNT);
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index bd714f3..6ad5642 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -1099,7 +1099,7 @@ int object_cache_handle_request(struct request *req)
 	int ret;
 	bool create = false;
 
-	sd_debug("%08" PRIx32 ", len %" PRIu32 ", off %" PRIu64, idx,
+	sd_debug("%08" PRIx32 ", len %" PRIu32 ", off %" PRIu32, idx,
 		 hdr->data_length, hdr->obj.offset);
 
 	cache = find_object_cache(vid, true);
diff --git a/sheep/ops.c b/sheep/ops.c
index 5206faf..7e12c5c 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -969,6 +969,7 @@ static int peer_create_and_write_obj(struct request *req)
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.epoch = epoch;
 	iocb.length = get_objsize(oid);
+	iocb.ec_index = hdr->obj.ec_index;
 	if (hdr->flags & SD_FLAG_CMD_COW) {
 		sd_debug("%" PRIx64 ", %" PRIx64, oid, hdr->obj.cow_oid);
 
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 3ab707a..0eb955a 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -128,7 +128,7 @@ int default_write(uint64_t oid, const struct siocb *iocb)
 	size = xpwrite(fd, iocb->buf, iocb->length, iocb->offset);
 	if (unlikely(size != iocb->length)) {
 		sd_err("failed to write object %"PRIx64", path=%s, offset=%"
-		       PRId64", size=%"PRId32", result=%zd, %m", oid, path,
+		       PRId32", size=%"PRId32", result=%zd, %m", oid, path,
 		       iocb->offset, iocb->length, size);
 		ret = err_to_sderr(path, oid, errno);
 		goto out;
@@ -248,7 +248,7 @@ static int default_read_from_path(uint64_t oid, const char *path,
 	size = xpread(fd, iocb->buf, iocb->length, iocb->offset);
 	if (unlikely(size != iocb->length)) {
 		sd_err("failed to read object %"PRIx64", path=%s, offset=%"
-		       PRId64", size=%"PRId32", result=%zd, %m", oid, path,
+		       PRId32", size=%"PRId32", result=%zd, %m", oid, path,
 		       iocb->offset, iocb->length, size);
 		ret = err_to_sderr(path, oid, errno);
 	}
@@ -300,6 +300,17 @@ static size_t get_store_objsize(uint64_t oid)
 	return get_objsize(oid);
 }
 
+#define ECNAME "user.ec.index"
+#define ECSIZE sizeof(uint8_t)
+static int set_erasure_index(const char *path, uint8_t idx)
+{
+	if (setxattr(path, ECNAME, &idx, ECSIZE, 0) < 0) {
+		sd_err("failed to setxattr %s, %m", path);
+		return -1;
+	}
+	return 0;
+}
+
 int default_create_and_write(uint64_t oid, const struct siocb *iocb)
 {
 	char path[PATH_MAX], tmp_path[PATH_MAX];
@@ -307,6 +318,7 @@ int default_create_and_write(uint64_t oid, const struct siocb *iocb)
 	int ret, fd;
 	uint32_t len = iocb->length;
 
+	sd_debug("%"PRIx64, oid);
 	get_obj_path(oid, path, sizeof(path));
 	get_tmp_obj_path(oid, tmp_path, sizeof(tmp_path));
 
@@ -359,7 +371,11 @@ int default_create_and_write(uint64_t oid, const struct siocb *iocb)
 		ret = err_to_sderr(path, oid, errno);
 		goto out;
 	}
-	sd_debug("%"PRIx64, oid);
+	if (is_erasure_oid(oid) &&
+	    set_erasure_index(path, iocb->ec_index) < 0) {
+		ret = err_to_sderr(path, oid, errno);
+		goto out;
+	}
 	ret = SD_RES_SUCCESS;
 out:
 	if (ret != SD_RES_SUCCESS)
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 634efc6..9b83883 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -162,7 +162,8 @@ struct siocb {
 	uint32_t epoch;
 	void *buf;
 	uint32_t length;
-	uint64_t offset;
+	uint32_t offset;
+	uint8_t ec_index;
 };
 
 /* This structure is used to pass parameters to vdi_* functions. */
-- 
1.7.9.5




More information about the sheepdog mailing list