[sheepdog] [PATCH 5/5] sheep: fix nested requests for the same FD

Liu Yuan namei.unix at gmail.com
Tue Jun 19 05:55:18 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

Consider following flow:

forward_write_obj_req
  ->get_sheep_fd()  <---384 fd
  ->send_req()  <--req 1
  ->do_local_io()
      ->store_create_and_write_obj()
          ->read_object()
              ->forward_read_obj_req()
                  ->get_sheep_fd()   <---384 fd
                  ->exec_req()
                      ->send_req() <--req 2
                      ->do_read() <--read response of req 1

This will allow second do_read() read the wrong response.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/ops.c |   91 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 78 insertions(+), 13 deletions(-)

diff --git a/sheep/ops.c b/sheep/ops.c
index e164cbc..4ebd4d6 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -556,23 +556,88 @@ static int local_trace_cat_ops(const struct sd_req *req, struct sd_rsp *rsp, voi
 static int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
 				  uint64_t oid, char *buf)
 {
-	struct request read_req;
-	struct sd_req *hdr = &read_req.rq;
+	int i, j, nr_copies, ret;
+	struct sd_req hdr;
+	struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+	struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
+	struct sd_vnode *v;
+	char name[128];
+	int rounded_rand, local = -1;
+
+	nr_copies = get_nr_copies(vnodes);
+	oid_to_vnodes(vnodes, oid, nr_copies, obj_vnodes);
+
+	/* first try to read from local copy */
+	for (i = 0; i < nr_copies; i++) {
+		struct siocb iocb;
+
+		v = obj_vnodes[i];
+		addr_to_str(name, sizeof(name), v->addr, 0);
+
+		if (vnode_is_local(v)) {
+			memset(&iocb, 0, sizeof(iocb));
+			iocb.epoch = epoch;
+			iocb.buf = buf;
+			iocb.length = SD_DATA_OBJ_SIZE;
+			iocb.offset = 0;
+			ret = sd_store->read(oid, &iocb);
+			if (ret != SD_RES_SUCCESS) {
+				local = i;
+				break;
+			}
+			goto out;
+		}
+	}
+
+	/* then read random copy from cluster for better load balance */
+	rounded_rand = random() % nr_copies;
 
-	memset(&read_req, 0, sizeof(read_req));
-	hdr->opcode = SD_OP_READ_OBJ;
-	hdr->data_length = SD_DATA_OBJ_SIZE;
-	hdr->epoch = epoch;
+	for (i = 0; i < nr_copies; i++) {
+		unsigned wlen, rlen;
+		int fd;
+
+		j = (i + rounded_rand) % nr_copies;
+
+		/* bypass the local copy */
+		if (local == j)
+			continue;
+
+		v = obj_vnodes[j];
+		addr_to_str(name, sizeof(name), v->addr, 0);
+
+		fd = connect_to(name, v->port);
+		if (fd < 0)
+			continue;
 
-	hdr->obj.oid = oid;
-	hdr->obj.offset = 0;
-	hdr->obj.copies = get_nr_copies(vnodes);
+		rlen = SD_DATA_OBJ_SIZE;
+		wlen = 0;
 
-	read_req.data = buf;
-	read_req.op = get_sd_op(hdr->opcode);
-	read_req.vnodes = vnodes;
+		memset(&hdr, 0, sizeof(hdr));
+		hdr.opcode = SD_OP_READ_OBJ;
+		hdr.flags = SD_FLAG_CMD_IO_LOCAL;
+		hdr.epoch = epoch;
+		hdr.data_length = rlen;
 
-	return forward_read_obj_req(&read_req);
+		hdr.obj.oid = oid;
+		hdr.obj.offset = 0;
+
+		ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
+
+		close(fd);
+
+		if (ret) {
+			dprintf("%x, %x\n", ret, rsp->result);
+			continue;
+		}
+
+		if (rsp->result == SD_RES_SUCCESS)
+			break;
+	}
+
+	ret = rsp->result;
+	dprintf("%"PRIx64" ret:%x\n", oid, ret);
+out:
+	return ret;
 }
 
 static int store_remove_obj(struct request *req)
-- 
1.7.10.2




More information about the sheepdog mailing list