[sheepdog] [PATCH v4 07/10] sheep: use the specified copies number for IO requests

levin li levin108 at gmail.com
Thu Aug 9 07:27:42 CEST 2012


From: levin li <xingke.lwp at taobao.com>

In gateway_read{write,create_and_write}_obj, and read{write}_object,
we should use different copies number for different requests and objects,
instead of using the global copies number sys->nr_copies or calculated
from vnodes

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 sheep/farm/trunk.c |    2 +-
 sheep/gateway.c    |    4 +-
 sheep/ops.c        |    2 +-
 sheep/recovery.c   |    4 +-
 sheep/request.c    |    2 +-
 sheep/sheep_priv.h |    6 ++--
 sheep/store.c      |    9 +++++--
 sheep/vdi.c        |   56 +++++++++++++++++++++++++++++++++------------------
 8 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c
index cd1fd20..1bfb6b3 100644
--- a/sheep/farm/trunk.c
+++ b/sheep/farm/trunk.c
@@ -245,7 +245,7 @@ static int oid_stale(uint64_t oid)
 	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
 
 	vinfo = get_vnode_info();
-	nr_copies = get_nr_copies(vinfo);
+	nr_copies = get_obj_copy_number(oid);
 
 	oid_to_vnodes(vinfo->vnodes, vinfo->nr_vnodes, oid,
 		      nr_copies, obj_vnodes);
diff --git a/sheep/gateway.c b/sheep/gateway.c
index bdbd08c..41d712b 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -35,7 +35,7 @@ int gateway_read_obj(struct request *req)
 	if (sys->enable_write_cache && !req->local && !bypass_object_cache(req))
 		return object_cache_handle_request(req);
 
-	nr_copies = get_nr_copies(req->vinfo);
+	nr_copies = get_req_copy_number(req);
 	oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
 		      nr_copies, obj_vnodes);
 	for (i = 0; i < nr_copies; i++) {
@@ -245,7 +245,7 @@ static int gateway_forward_request(struct request *req)
 
 	write_info_init(&wi);
 	wlen = hdr.data_length;
-	nr_copies = get_nr_copies(req->vinfo);
+	nr_copies = get_req_copy_number(req);
 	oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
 		      nr_copies, obj_vnodes);
 
diff --git a/sheep/ops.c b/sheep/ops.c
index ce0f8a4..faa50b5 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -664,7 +664,7 @@ static int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
 	char name[128];
 	int rounded_rand, local = -1;
 
-	nr_copies = get_nr_copies(vnodes);
+	nr_copies = get_obj_copy_number(oid);
 	oid_to_vnodes(vnodes->vnodes, vnodes->nr_vnodes, oid,
 		      nr_copies, obj_vnodes);
 
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 3fdcad2..060730b 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -172,7 +172,7 @@ again:
 		oid, tgt_epoch);
 
 	/* Let's do a breadth-first search */
-	nr_copies = get_nr_copies(old);
+	nr_copies = get_obj_copy_number(oid);
 	for (i = 0; i < nr_copies; i++) {
 		struct sd_vnode *tgt_vnode = oid_to_vnode(old->vnodes,
 							  old->nr_vnodes,
@@ -515,8 +515,8 @@ static void screen_object_list(struct recovery_work *rw,
 	int nr_objs;
 	int i, j;
 
-	nr_objs = get_nr_copies(rw->cur_vinfo);
 	for (i = 0; i < nr_oids; i++) {
+		nr_objs = get_obj_copy_number(oids[i]);
 		oid_to_vnodes(rw->cur_vinfo->vnodes, rw->cur_vinfo->nr_vnodes,
 			      oids[i], nr_objs, vnodes);
 		for (j = 0; j < nr_objs; j++) {
diff --git a/sheep/request.c b/sheep/request.c
index ab7c63a..ce4315b 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -30,7 +30,7 @@ static int is_access_local(struct request *req, uint64_t oid)
 	int nr_copies;
 	int i;
 
-	nr_copies = get_nr_copies(req->vinfo);
+	nr_copies = get_req_copy_number(req);
 	oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
 		      nr_copies, obj_vnodes);
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 335e337..ec2b4dc 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -282,10 +282,10 @@ int is_recovery_init(void);
 int node_in_recovery(void);
 
 int write_object(uint64_t oid, char *data, unsigned int datalen,
-		 uint64_t offset, uint16_t flags, int create);
+		 uint64_t offset, uint16_t flags, int create, int nr_copies);
 int read_object(uint64_t oid, char *data, unsigned int datalen,
-		uint64_t offset);
-int remove_object(uint64_t oid);
+		uint64_t offset, int nr_copies);
+int remove_object(uint64_t oid, int nr_copies);
 
 int exec_local_req(struct sd_req *rq, void *data);
 void local_req_init(void);
diff --git a/sheep/store.c b/sheep/store.c
index bebb2c0..4839d13 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -548,7 +548,7 @@ int init_store(const char *d, int enable_write_cache)
  * Write data to both local object cache (if enabled) and backends
  */
 int write_object(uint64_t oid, char *data, unsigned int datalen,
-		 uint64_t offset, uint16_t flags, int create)
+		 uint64_t offset, uint16_t flags, int create, int nr_copies)
 {
 	struct sd_req hdr;
 	int ret;
@@ -576,6 +576,7 @@ forward_write:
 
 	hdr.obj.oid = oid;
 	hdr.obj.offset = offset;
+	hdr.obj.copies = nr_copies;
 
 	ret = exec_local_req(&hdr, data);
 	if (ret != SD_RES_SUCCESS)
@@ -589,7 +590,7 @@ forward_write:
  * try read backends
  */
 int read_object(uint64_t oid, char *data, unsigned int datalen,
-		uint64_t offset)
+		uint64_t offset, int nr_copies)
 {
 	struct sd_req hdr;
 	int ret;
@@ -609,6 +610,7 @@ forward_read:
 	hdr.data_length = datalen;
 	hdr.obj.oid = oid;
 	hdr.obj.offset = offset;
+	hdr.obj.copies = nr_copies;
 
 	ret = exec_local_req(&hdr, data);
 	if (ret != SD_RES_SUCCESS)
@@ -617,13 +619,14 @@ forward_read:
 	return ret;
 }
 
-int remove_object(uint64_t oid)
+int remove_object(uint64_t oid, int copies)
 {
 	struct sd_req hdr;
 	int ret;
 
 	sd_init_req(&hdr, SD_OP_REMOVE_OBJ);
 	hdr.obj.oid = oid;
+	hdr.obj.copies = copies;
 
 	ret = exec_local_req(&hdr, NULL);
 	if (ret != SD_RES_SUCCESS)
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 72fbd7b..c39074f 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -300,6 +300,7 @@ int vdi_exist(uint32_t vid)
 {
 	struct sheepdog_inode *inode;
 	int ret = 1;
+	int nr_copies;
 
 	inode = zalloc(sizeof(*inode));
 	if (!inode) {
@@ -307,8 +308,10 @@ int vdi_exist(uint32_t vid)
 		goto out;
 	}
 
+	nr_copies = get_vdi_copy_number(vid);
+
 	ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
-			  sizeof(*inode), 0);
+			  sizeof(*inode), 0, nr_copies);
 	if (ret != SD_RES_SUCCESS) {
 		eprintf("fail to read vdi inode (%" PRIx32 ")\n", vid);
 		ret = 0;
@@ -359,7 +362,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
 
 	if (iocb->base_vid) {
 		ret = read_object(vid_to_vdi_oid(iocb->base_vid), (char *)base,
-				  sizeof(*base), 0);
+				  sizeof(*base), 0, iocb->nr_copies);
 		if (ret != SD_RES_SUCCESS) {
 			ret = SD_RES_BASE_VDI_READ;
 			goto out;
@@ -374,7 +377,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
 				name, cur_vid, iocb->base_vid);
 
 			ret = read_object(vid_to_vdi_oid(cur_vid), (char *)cur,
-					  SD_INODE_HEADER_SIZE, 0);
+					  SD_INODE_HEADER_SIZE, 0, iocb->nr_copies);
 			if (ret != SD_RES_SUCCESS) {
 				vprintf(SDOG_ERR, "failed\n");
 				ret = SD_RES_BASE_VDI_READ;
@@ -416,7 +419,8 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
 
 	if (iocb->is_snapshot && cur_vid != iocb->base_vid) {
 		ret = write_object(vid_to_vdi_oid(cur_vid), (char *)cur,
-				   SD_INODE_HEADER_SIZE, 0, 0, 0);
+				   SD_INODE_HEADER_SIZE, 0, 0, 0,
+				   iocb->nr_copies);
 		if (ret != 0) {
 			vprintf(SDOG_ERR, "failed\n");
 			ret = SD_RES_BASE_VDI_READ;
@@ -426,7 +430,8 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
 
 	if (iocb->base_vid) {
 		ret = write_object(vid_to_vdi_oid(iocb->base_vid), (char *)base,
-				   SD_INODE_HEADER_SIZE, 0, 0, 0);
+				   SD_INODE_HEADER_SIZE, 0, 0, 0,
+				   iocb->nr_copies);
 		if (ret != 0) {
 			vprintf(SDOG_ERR, "failed\n");
 			ret = SD_RES_BASE_VDI_WRITE;
@@ -435,7 +440,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
 	}
 
 	ret = write_object(vid_to_vdi_oid(new_vid), (char *)new, sizeof(*new),
-			   0, 0, 1);
+			   0, 0, 1, iocb->nr_copies);
 	if (ret != 0)
 		ret = SD_RES_VDI_WRITE;
 
@@ -455,6 +460,7 @@ static int find_first_vdi(unsigned long start, unsigned long end, char *name,
 	unsigned long i;
 	int ret = SD_RES_NO_MEM;
 	int vdi_found = 0;
+	int nr_copies;
 
 	inode = malloc(SD_INODE_HEADER_SIZE);
 	if (!inode) {
@@ -463,8 +469,9 @@ static int find_first_vdi(unsigned long start, unsigned long end, char *name,
 	}
 
 	for (i = start; i >= end; i--) {
+		nr_copies = get_vdi_copy_number(i);
 		ret = read_object(vid_to_vdi_oid(i), (char *)inode,
-				  SD_INODE_HEADER_SIZE, 0);
+				  SD_INODE_HEADER_SIZE, 0, nr_copies);
 		if (ret != SD_RES_SUCCESS) {
 			ret = SD_RES_EIO;
 			goto out_free_inode;
@@ -662,6 +669,7 @@ struct deletion_work {
 	struct request *req;
 
 	uint32_t vid;
+	int nr_copies;
 
 	int count;
 	uint32_t *buf;
@@ -681,7 +689,7 @@ static int delete_inode(struct deletion_work *dw)
 	}
 
 	ret = read_object(vid_to_vdi_oid(dw->vid), (char *)inode,
-			  SD_INODE_HEADER_SIZE, 0);
+			  SD_INODE_HEADER_SIZE, 0, dw->nr_copies);
 	if (ret != SD_RES_SUCCESS) {
 		ret = SD_RES_EIO;
 		goto out;
@@ -690,7 +698,7 @@ static int delete_inode(struct deletion_work *dw)
 	memset(inode->name, 0, sizeof(inode->name));
 
 	ret = write_object(vid_to_vdi_oid(dw->vid), (char *)inode,
-			   SD_INODE_HEADER_SIZE, 0, 0, 0);
+			   SD_INODE_HEADER_SIZE, 0, 0, 0, dw->nr_copies);
 	if (ret != 0) {
 		ret = SD_RES_EIO;
 		goto out;
@@ -723,6 +731,7 @@ static void delete_one(struct work *work)
 	uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1);
 	int ret, i, nr_deleted;
 	struct sheepdog_inode *inode = NULL;
+	int nr_copies;
 
 	eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id);
 
@@ -732,8 +741,9 @@ static void delete_one(struct work *work)
 		goto out;
 	}
 
+	nr_copies = get_vdi_copy_number(vdi_id);
 	ret = read_object(vid_to_vdi_oid(vdi_id),
-			  (void *)inode, sizeof(*inode), 0);
+			  (void *)inode, sizeof(*inode), 0, nr_copies);
 
 	if (ret != SD_RES_SUCCESS) {
 		eprintf("cannot find VDI object\n");
@@ -757,7 +767,7 @@ static void delete_one(struct work *work)
 			continue;
 		}
 
-		ret = remove_object(oid);
+		ret = remove_object(oid, nr_copies);
 		if (ret != SD_RES_SUCCESS)
 			eprintf("remove object %" PRIx64 " fail, %d\n", oid, ret);
 
@@ -774,7 +784,7 @@ static void delete_one(struct work *work)
 	memset(inode->name, 0, sizeof(inode->name));
 
 	write_object(vid_to_vdi_oid(vdi_id), (void *)inode,
-		     sizeof(*inode), 0, 0, 0);
+		     sizeof(*inode), 0, 0, 0, nr_copies);
 out:
 	free(inode);
 }
@@ -811,6 +821,7 @@ static int fill_vdi_list(struct deletion_work *dw, uint32_t root_vid)
 	struct sheepdog_inode *inode = NULL;
 	int done = dw->count;
 	uint32_t vid;
+	int nr_copies;
 
 	inode = malloc(SD_INODE_HEADER_SIZE);
 	if (!inode) {
@@ -821,8 +832,9 @@ static int fill_vdi_list(struct deletion_work *dw, uint32_t root_vid)
 	dw->buf[dw->count++] = root_vid;
 again:
 	vid = dw->buf[done++];
+	nr_copies = get_vdi_copy_number(vid);
 	ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
-			  SD_INODE_HEADER_SIZE, 0);
+			  SD_INODE_HEADER_SIZE, 0, nr_copies);
 
 	if (ret != SD_RES_SUCCESS) {
 		eprintf("cannot find VDI object\n");
@@ -851,7 +863,7 @@ out:
 
 static uint64_t get_vdi_root(uint32_t vid, int *cloned)
 {
-	int ret;
+	int ret, nr_copies;
 	struct sheepdog_inode *inode = NULL;
 
 	*cloned = 0;
@@ -863,8 +875,9 @@ static uint64_t get_vdi_root(uint32_t vid, int *cloned)
 		goto out;
 	}
 next:
+	nr_copies = get_vdi_copy_number(vid);
 	ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
-			  SD_INODE_HEADER_SIZE, 0);
+			  SD_INODE_HEADER_SIZE, 0, nr_copies);
 
 	if (vid == inode->vdi_id && inode->snap_id == 1
 			&& inode->parent_vdi_id != 0
@@ -910,6 +923,7 @@ static int start_deletion(struct request *req, uint32_t vid)
 	dw->count = 0;
 	dw->vid = vid;
 	dw->req = req;
+	dw->nr_copies = get_vdi_copy_number(vid);
 
 	dw->work.fn = delete_one;
 	dw->work.done = delete_one_done;
@@ -965,7 +979,7 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
 	struct sheepdog_vdi_attr tmp_attr;
 	uint64_t oid, hval;
 	uint32_t end;
-	int ret;
+	int ret, nr_copies;
 
 	vattr->ctime = create_time;
 
@@ -979,12 +993,13 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
 	end = *attrid - 1;
 	while (*attrid != end) {
 		oid = vid_to_attr_oid(vid, *attrid);
+		nr_copies = get_vdi_copy_number(vid);
 		ret = read_object(oid, (char *)&tmp_attr,
-				  sizeof(tmp_attr), 0);
+				  sizeof(tmp_attr), 0, nr_copies);
 
 		if (ret == SD_RES_NO_OBJ && wr) {
 			ret = write_object(oid, (char *)vattr,
-					   data_len, 0, 0, 1);
+					   data_len, 0, 0, 1, nr_copies);
 			if (ret)
 				ret = SD_RES_EIO;
 			else
@@ -1005,14 +1020,15 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
 			else if (delete) {
 				ret = write_object(oid, (char *)"", 1,
 						   offsetof(struct sheepdog_vdi_attr, name),
-						   0, 0);
+						   0, 0, nr_copies);
 				if (ret)
 					ret = SD_RES_EIO;
 				else
 					ret = SD_RES_SUCCESS;
 			} else if (wr) {
 				ret = write_object(oid, (char *)vattr,
-						   SD_ATTR_OBJ_SIZE, 0, 0, 0);
+						   SD_ATTR_OBJ_SIZE, 0, 0, 0,
+						   nr_copies);
 
 				if (ret)
 					ret = SD_RES_EIO;
-- 
1.7.1




More information about the sheepdog mailing list