[sheepdog] [PATCH 1/3] sheep: fix a bug that read_object() fail in recovery
levin li
levin108 at gmail.com
Sun Jun 24 15:24:54 CEST 2012
From: levin li <xingke.lwp at taobao.com>
read_object() calls forward_read_obj_req() to get the object,
but may fail with result SD_RES_OLD_NODE_VER, read_object() do
nothing to handle this error, this patch fixed this problem
by sending a gateway request to local node, making gateway to
retry when error occur.
Signed-off-by: levin li <xingke.lwp at taobao.com>
---
sheep/store.c | 152 ++++++++++++++++++++++++++++++++-------------------------
1 file changed, 86 insertions(+), 66 deletions(-)
diff --git a/sheep/store.c b/sheep/store.c
index 52c4716..cfd980b 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -516,9 +516,11 @@ int write_object(struct vnode_info *vnodes, uint32_t epoch,
uint64_t oid, char *data, unsigned int datalen,
uint64_t offset, uint16_t flags, int nr_copies, int create)
{
- struct request write_req;
- struct sd_req *hdr = &write_req.rq;
- int ret;
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+ char host[128];
+ unsigned int wlen = datalen, rlen = 0;
+ int fd, ret;
if (sys->enable_write_cache && object_is_cached(oid)) {
ret = object_cache_write(oid, data, datalen, offset,
@@ -530,23 +532,35 @@ int write_object(struct vnode_info *vnodes, uint32_t epoch,
}
}
- memset(&write_req, 0, sizeof(write_req));
- hdr->opcode = create ? SD_OP_CREATE_AND_WRITE_OBJ : SD_OP_WRITE_OBJ;
- hdr->flags = SD_FLAG_CMD_WRITE;
- hdr->data_length = datalen;
- hdr->epoch = epoch;
+ addr_to_str(host, sizeof(host), sys->this_node.addr, 0);
+ fd = connect_to(host, sys->this_node.port);
+ if (fd < 0) {
+ dprintf("Failed to connect to local node\n");
+ return SD_RES_NETWORK_ERROR;
+ }
- hdr->obj.oid = oid;
- hdr->obj.offset = offset;
- hdr->obj.copies = nr_copies;
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.opcode = create ? SD_OP_CREATE_AND_WRITE_OBJ : SD_OP_WRITE_OBJ;
+ hdr.flags = SD_FLAG_CMD_WRITE;
+ hdr.data_length = datalen;
- write_req.data = data;
- write_req.op = get_sd_op(hdr->opcode);
- write_req.vnodes = vnodes;
+ hdr.obj.oid = oid;
+ hdr.obj.offset = offset;
+
+ ret = exec_req(fd, &hdr, data, &wlen, &rlen);
+ close(fd);
+
+ if (ret) {
+ dprintf("Failed to write object %" PRIx64 "\n", oid);
+ return SD_RES_NETWORK_ERROR;
+ }
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ dprintf("Failed to write object %" PRIx64 " %s\n", oid,
+ sd_strerror(rsp->result));
+ return rsp->result;
+ }
- ret = forward_write_obj_req(&write_req);
- if (ret != SD_RES_SUCCESS)
- eprintf("failed to forward write object %x\n", ret);
return ret;
}
@@ -558,9 +572,11 @@ int read_object(struct vnode_info *vnodes, uint32_t epoch,
uint64_t oid, char *data, unsigned int datalen,
uint64_t offset, int nr_copies)
{
- struct request read_req;
- struct sd_req *hdr = &read_req.rq;
- int ret;
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+ char host[128];
+ unsigned int wlen = 0, rlen = datalen;
+ int fd, ret;
if (sys->enable_write_cache && object_is_cached(oid)) {
ret = object_cache_read(oid, data, datalen, offset,
@@ -572,23 +588,35 @@ int read_object(struct vnode_info *vnodes, uint32_t epoch,
}
return ret;
}
- memset(&read_req, 0, sizeof(read_req));
+
forward_read:
- hdr->opcode = SD_OP_READ_OBJ;
- hdr->data_length = datalen;
- hdr->epoch = epoch;
+ addr_to_str(host, sizeof(host), sys->this_node.addr, 0);
+ fd = connect_to(host, sys->this_node.port);
+ if (fd < 0) {
+ dprintf("Failed to connect to local node\n");
+ return SD_RES_NETWORK_ERROR;
+ }
- hdr->obj.oid = oid;
- hdr->obj.offset = offset;
- hdr->obj.copies = nr_copies;
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.opcode = SD_OP_READ_OBJ;
+ hdr.data_length = datalen;
- read_req.data = data;
- read_req.op = get_sd_op(hdr->opcode);
- read_req.vnodes = vnodes;
+ hdr.obj.oid = oid;
+ hdr.obj.offset = offset;
- ret = forward_read_obj_req(&read_req);
- if (ret != SD_RES_SUCCESS)
- eprintf("failed to forward read object %x\n", ret);
+ ret = exec_req(fd, &hdr, data, &wlen, &rlen);
+ close(fd);
+
+ if (ret) {
+ dprintf("Failed to read object %" PRIx64 "\n", oid);
+ return SD_RES_NETWORK_ERROR;
+ }
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ dprintf("Failed to read object %" PRIx64 " %s\n", oid,
+ sd_strerror(rsp->result));
+ return rsp->result;
+ }
return ret;
}
@@ -596,47 +624,39 @@ forward_read:
int remove_object(struct vnode_info *vnodes, uint32_t epoch,
uint64_t oid, int nr)
{
- struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
- int err = 0, i = 0;
-
- oid_to_vnodes(vnodes, oid, nr, obj_vnodes);
- for (i = 0; i < nr; i++) {
- struct sd_req hdr;
- struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
- struct sd_vnode *v;
- unsigned wlen = 0, rlen = 0;
- char name[128];
- int fd, ret;
-
- v = obj_vnodes[i];
- addr_to_str(name, sizeof(name), v->addr, 0);
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+ char host[128];
+ unsigned int wlen = 0, rlen = 0;
+ int fd, ret;
- fd = connect_to(name, v->port);
- if (fd < 0) {
- rsp->result = SD_RES_NETWORK_ERROR;
- return -1;
- }
+ addr_to_str(host, sizeof(host), sys->this_node.addr, 0);
- memset(&hdr, 0, sizeof(hdr));
- hdr.epoch = epoch;
- hdr.opcode = SD_OP_REMOVE_OBJ;
- hdr.flags = 0;
- hdr.data_length = rlen;
+ fd = connect_to(host, sys->this_node.port);
+ if (fd < 0) {
+ rsp->result = SD_RES_NETWORK_ERROR;
+ return -1;
+ }
- hdr.obj.oid = oid;
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.opcode = SD_OP_REMOVE_OBJ;
+ hdr.flags = SD_FLAG_CMD_WRITE;
- ret = exec_req(fd, &hdr, NULL, &wlen, &rlen);
- close(fd);
+ hdr.obj.oid = oid;
- if (ret)
- return -1;
+ ret = exec_req(fd, &hdr, NULL, &wlen, &rlen);
+ close(fd);
- if (rsp->result != SD_RES_SUCCESS)
- err = 1;
+ if (ret) {
+ dprintf("Failed to remove object %" PRIx64 "\n", oid);
+ return SD_RES_NETWORK_ERROR;
}
- if (err)
- return -1;
+ if (rsp->result != SD_RES_SUCCESS) {
+ dprintf("Failed to remove object %" PRIx64 " %s\n", oid,
+ sd_strerror(rsp->result));
+ return rsp->result;
+ }
return 0;
}
--
1.7.10
More information about the sheepdog
mailing list