[sheepdog] [PATCH] sheep: optimize read_copy_from_replica
Yibin Shen
zituan at taobao.com
Tue May 29 09:02:47 CEST 2012
read local base object firstly, then try a random copy in the cluster,
this patch can loadbalance read traffic in a large scale cluster with
lots of cloned VM with an identical base image effectively.
Signed-off-by: Yibin Shen <zituan at taobao.com>
---
sheep/ops.c | 38 ++++++++++++++++++++++++++++----------
1 file changed, 28 insertions(+), 10 deletions(-)
diff --git a/sheep/ops.c b/sheep/ops.c
index b63955c..56d3daa 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -641,20 +641,20 @@ static int local_trace_cat_ops(const struct sd_req *req, struct sd_rsp *rsp, voi
static int read_copy_from_replica(struct request *req, uint32_t epoch,
uint64_t oid, char *buf)
{
- int i, nr_copies, ret;
+ int i, j, nr_copies, ret;
struct sd_req hdr;
struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
+ struct sd_vnode *v;
+ char name[128];
+ int rounded_rand, local = -1;
nr_copies = get_nr_copies(req->vnodes);
oid_to_vnodes(req->vnodes, oid, nr_copies, obj_vnodes);
+ /* first try to read from local copy */
for (i = 0; i < nr_copies; i++) {
- struct sd_vnode *v;
struct siocb iocb;
- char name[128];
- unsigned wlen, rlen;
- int fd;
v = obj_vnodes[i];
addr_to_str(name, sizeof(name), v->addr, 0);
@@ -666,10 +666,28 @@ static int read_copy_from_replica(struct request *req, uint32_t epoch,
iocb.length = SD_DATA_OBJ_SIZE;
iocb.offset = 0;
ret = sd_store->read(oid, &iocb);
- if (ret != SD_RES_SUCCESS)
- continue;
+ if (ret != SD_RES_SUCCESS) {
+ local = i; //failed to read local copy, mark it
+ break;
+ }
goto out;
}
+ }
+
+ /* then read random copy from cluster */
+ rounded_rand = random() % nr_copies;
+
+ for (i = 0; i < nr_copies; i++) {
+ unsigned wlen, rlen;
+ int fd;
+
+ j = (i + rounded_rand) % nr_copies;
+
+ /* bypass the local copy */
+ if (local == j) continue;
+
+ v = obj_vnodes[j];
+ addr_to_str(name, sizeof(name), v->addr, 0);
fd = connect_to(name, v->port);
if (fd < 0)
@@ -691,10 +709,10 @@ static int read_copy_from_replica(struct request *req, uint32_t epoch,
close(fd);
- dprintf("%x, %x\n", ret, rsp->result);
- if (ret)
+ if (ret) {
+ dprintf("%x, %x\n", ret, rsp->result);
continue;
-
+ }
switch (rsp->result) {
case SD_RES_SUCCESS:
ret = SD_RES_SUCCESS;
--
1.7.10
More information about the sheepdog
mailing list