[sheepdog] [PATCH 5/5] sheep: fix nested requests for the same FD
Liu Yuan
namei.unix at gmail.com
Tue Jun 19 05:55:18 CEST 2012
From: Liu Yuan <tailai.ly at taobao.com>
Consider following flow:
forward_write_obj_req
->get_sheep_fd() <---384 fd
->send_req() <--req 1
->do_local_io()
->store_create_and_write_obj()
->read_object()
->forward_read_obj_req()
->get_sheep_fd() <---384 fd
->exec_req()
->send_req() <--req 2
->do_read() <--read response of req 1
This will allow second do_read() read the wrong response.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/ops.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 78 insertions(+), 13 deletions(-)
diff --git a/sheep/ops.c b/sheep/ops.c
index e164cbc..4ebd4d6 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -556,23 +556,88 @@ static int local_trace_cat_ops(const struct sd_req *req, struct sd_rsp *rsp, voi
static int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
uint64_t oid, char *buf)
{
- struct request read_req;
- struct sd_req *hdr = &read_req.rq;
+ int i, j, nr_copies, ret;
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+ struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
+ struct sd_vnode *v;
+ char name[128];
+ int rounded_rand, local = -1;
+
+ nr_copies = get_nr_copies(vnodes);
+ oid_to_vnodes(vnodes, oid, nr_copies, obj_vnodes);
+
+ /* first try to read from local copy */
+ for (i = 0; i < nr_copies; i++) {
+ struct siocb iocb;
+
+ v = obj_vnodes[i];
+ addr_to_str(name, sizeof(name), v->addr, 0);
+
+ if (vnode_is_local(v)) {
+ memset(&iocb, 0, sizeof(iocb));
+ iocb.epoch = epoch;
+ iocb.buf = buf;
+ iocb.length = SD_DATA_OBJ_SIZE;
+ iocb.offset = 0;
+ ret = sd_store->read(oid, &iocb);
+ if (ret != SD_RES_SUCCESS) {
+ local = i;
+ break;
+ }
+ goto out;
+ }
+ }
+
+ /* then read random copy from cluster for better load balance */
+ rounded_rand = random() % nr_copies;
- memset(&read_req, 0, sizeof(read_req));
- hdr->opcode = SD_OP_READ_OBJ;
- hdr->data_length = SD_DATA_OBJ_SIZE;
- hdr->epoch = epoch;
+ for (i = 0; i < nr_copies; i++) {
+ unsigned wlen, rlen;
+ int fd;
+
+ j = (i + rounded_rand) % nr_copies;
+
+ /* bypass the local copy */
+ if (local == j)
+ continue;
+
+ v = obj_vnodes[j];
+ addr_to_str(name, sizeof(name), v->addr, 0);
+
+ fd = connect_to(name, v->port);
+ if (fd < 0)
+ continue;
- hdr->obj.oid = oid;
- hdr->obj.offset = 0;
- hdr->obj.copies = get_nr_copies(vnodes);
+ rlen = SD_DATA_OBJ_SIZE;
+ wlen = 0;
- read_req.data = buf;
- read_req.op = get_sd_op(hdr->opcode);
- read_req.vnodes = vnodes;
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.opcode = SD_OP_READ_OBJ;
+ hdr.flags = SD_FLAG_CMD_IO_LOCAL;
+ hdr.epoch = epoch;
+ hdr.data_length = rlen;
- return forward_read_obj_req(&read_req);
+ hdr.obj.oid = oid;
+ hdr.obj.offset = 0;
+
+ ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
+
+ close(fd);
+
+ if (ret) {
+ dprintf("%x, %x\n", ret, rsp->result);
+ continue;
+ }
+
+ if (rsp->result == SD_RES_SUCCESS)
+ break;
+ }
+
+ ret = rsp->result;
+ dprintf("%"PRIx64" ret:%x\n", oid, ret);
+out:
+ return ret;
}
static int store_remove_obj(struct request *req)
--
1.7.10.2
More information about the sheepdog
mailing list