[sheepdog] [PATCH] sheep: refactor read/write_object()
Liu Yuan
namei.unix at gmail.com
Tue May 29 12:56:30 CEST 2012
From: Liu Yuan <tailai.ly at taobao.com>
It would be nice if all the read/write goes to the forward_read/write_req_obj()
This also cleans up the code a bit. But this patch goes further than refactor:
- for now only vdi opreation calls read/write_object(), which simply use
connect_to() but forward_read/write_req_obj() does a timeout read/write.
- I'm planing to use these functions for object cache writethrough too.
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
sheep/gateway.c | 2 +-
sheep/sheep_priv.h | 1 +
sheep/store.c | 238 +++++++++++++---------------------------------------
3 files changed, 58 insertions(+), 183 deletions(-)
diff --git a/sheep/gateway.c b/sheep/gateway.c
index 21ee6a1..5672952 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -76,7 +76,7 @@ static int object_cache_handle_request(struct request *req)
return object_cache_rw(cache, idx, req);
}
-static int forward_read_obj_req(struct request *req)
+int forward_read_obj_req(struct request *req)
{
int i, fd, ret = SD_RES_SUCCESS;
unsigned wlen, rlen;
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 946c2b3..983783d 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -258,6 +258,7 @@ void queue_cluster_request(struct request *req);
void do_io_request(struct work *work);
void do_gateway_request(struct work *work);
int forward_write_obj_req(struct request *req);
+int forward_read_obj_req(struct request *req);
int read_epoch(uint32_t *epoch, uint64_t *ctime,
struct sd_node *entries, int *nr_entries);
diff --git a/sheep/store.c b/sheep/store.c
index 00f8064..ff3d59a 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -533,40 +533,6 @@ int read_epoch(uint32_t *epoch, uint64_t *ct,
return SD_RES_SUCCESS;
}
-static int write_object_local(uint64_t oid, char *data, unsigned int datalen,
- uint64_t offset, uint16_t flags, int copies,
- uint32_t epoch, int create)
-{
- int ret;
- struct request *req;
- struct sd_req *hdr;
-
- req = zalloc(sizeof(*req));
- if (!req)
- return SD_RES_NO_MEM;
- hdr = &req->rq;
-
- if (create)
- hdr->opcode = SD_OP_CREATE_AND_WRITE_OBJ;
- else
- hdr->opcode = SD_OP_WRITE_OBJ;
- hdr->flags = flags | SD_FLAG_CMD_WRITE;
- hdr->data_length = datalen;
-
- hdr->obj.oid = oid;
- hdr->obj.offset = offset;
- hdr->obj.copies = copies;
-
- req->data = data;
- req->op = get_sd_op(hdr->opcode);
-
- ret = do_local_io(req, epoch);
-
- free(req);
-
- return ret;
-}
-
static int write_inode_cache(uint64_t oid, char *data, unsigned int datalen,
uint64_t offset, uint16_t flags, int copies,
uint32_t epoch, int create)
@@ -605,101 +571,43 @@ static int write_inode_cache(uint64_t oid, char *data, unsigned int datalen,
return ret;
}
-int write_object(struct vnode_info *vnodes, uint32_t node_version,
+/*
+ * Write data to both local object cache (if enabled) and backends
+ */
+int write_object(struct vnode_info *vnodes, uint32_t epoch,
uint64_t oid, char *data, unsigned int datalen,
uint64_t offset, uint16_t flags, int nr_copies, int create)
{
- struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
- int i, fd, ret;
+ struct request write_req;
+ struct sd_req *hdr = &write_req.rq;
+ int ret;
if (sys->enable_write_cache && object_is_cached(oid)) {
ret = write_inode_cache(oid, data, datalen, offset,
- flags, nr_copies, node_version, create);
+ flags, nr_copies, epoch, create);
if (ret != 0) {
- eprintf("fail %"PRIx64" %"PRIx32"\n", oid, ret);
- return -1;
- }
- }
-
- oid_to_vnodes(vnodes, oid, nr_copies, obj_vnodes);
- for (i = 0; i < nr_copies; i++) {
- struct sd_req hdr;
- unsigned rlen = 0, wlen = datalen;
- struct sd_vnode *v;
- char name[128];
-
- v = obj_vnodes[i];
- if (vnode_is_local(v)) {
- ret = write_object_local(oid, data, datalen, offset,
- flags, nr_copies, node_version,
- create);
-
- if (ret != 0) {
- eprintf("fail %"PRIx64" %"PRIx32"\n", oid, ret);
- return -1;
- }
-
- continue;
- }
-
- addr_to_str(name, sizeof(name), v->addr, 0);
-
- fd = connect_to(name, v->port);
- if (fd < 0) {
- eprintf("failed to connect to host %s\n", name);
- return -1;
- }
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.epoch = node_version;
- if (create)
- hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
- else
- hdr.opcode = SD_OP_WRITE_OBJ;
-
- hdr.flags = flags;
- hdr.flags |= SD_FLAG_CMD_WRITE | SD_FLAG_CMD_IO_LOCAL;
- hdr.data_length = wlen;
-
- hdr.obj.oid = oid;
- hdr.obj.offset = offset;
- hdr.obj.copies = nr_copies;
-
- ret = exec_req(fd, &hdr, data, &wlen, &rlen);
- close(fd);
- if (ret) {
- eprintf("failed to update host %s\n", name);
- return -1;
+ eprintf("write cache failed %"PRIx64" %"PRIx32"\n",
+ oid, ret);
+ return ret;
}
}
- return 0;
-}
-
-static int read_object_local(uint64_t oid, char *data, unsigned int datalen,
- uint64_t offset, int copies, uint32_t epoch)
-{
- int ret;
- struct request *req;
-
- req = zalloc(sizeof(*req));
- if (!req)
- return SD_RES_NO_MEM;
-
- req->rq.opcode = SD_OP_READ_OBJ;
- req->rq.flags = 0;
- req->rq.data_length = datalen;
-
- req->rq.obj.oid = oid;
- req->rq.obj.copies = copies;
- req->rq.obj.offset = offset;
+ hdr->opcode = create ? SD_OP_CREATE_AND_WRITE_OBJ : SD_OP_WRITE_OBJ;
+ hdr->flags = SD_FLAG_CMD_WRITE;
+ hdr->data_length = datalen;
+ hdr->epoch = epoch;
- req->data = data;
- req->op = get_sd_op(req->rq.opcode);
+ hdr->obj.oid = oid;
+ hdr->obj.offset = offset;
+ hdr->obj.copies = nr_copies;
- ret = do_local_io(req, epoch);
+ write_req.data = data;
+ write_req.op = get_sd_op(hdr->opcode);
+ write_req.vnodes = vnodes;
- free(req);
+ ret = forward_write_obj_req(&write_req);
+ if (ret != SD_RES_SUCCESS)
+ eprintf("failed to forward write object %x\n", ret);
return ret;
}
@@ -737,85 +645,51 @@ static int read_object_cache(uint64_t oid, char *data, unsigned int datalen,
return ret;
}
-
-int read_object(struct vnode_info *vnodes, uint32_t node_version,
+/*
+ * Read data firstly from local object cache, if fail, try read backends
+ *
+ * This is only sensiable if objects of same oid in the cache and backends
+ * are consistent
+ */
+int read_object(struct vnode_info *vnodes, uint32_t epoch,
uint64_t oid, char *data, unsigned int datalen,
uint64_t offset, int nr_copies)
{
- struct sd_vnode *v;
- struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
- char name[128];
- int i = 0, fd, ret, last_error = SD_RES_SUCCESS;
+ struct request read_req;
+ struct sd_req *hdr = &read_req.rq;
+ int ret;
if (sys->enable_write_cache && object_is_cached(oid)) {
ret = read_object_cache(oid, data, datalen, offset,
- nr_copies, node_version);
- if (ret != SD_RES_SUCCESS)
- eprintf("fail %"PRIx64" %"PRIx32"\n", oid, ret);
-
- return ret;
- }
-
- /* search a local object first */
- oid_to_vnodes(vnodes, oid, nr_copies, obj_vnodes);
- for (i = 0; i < nr_copies; i++) {
- v = obj_vnodes[i];
- if (vnode_is_local(v)) {
- ret = read_object_local(oid, data, datalen, offset,
- nr_copies, node_version);
-
- if (ret != SD_RES_SUCCESS) {
- eprintf("fail %"PRIx64" %"PRId32"\n", oid, ret);
- return ret;
- }
-
- return SD_RES_SUCCESS;
+ nr_copies, epoch);
+ if (ret != SD_RES_SUCCESS) {
+ eprintf("try forward read %"PRIx64" %"PRIx32"\n",
+ oid, ret);
+ goto forward_read;
}
-
+ return ret;
}
+forward_read:
+ hdr->opcode = SD_OP_READ_OBJ;
+ hdr->data_length = datalen;
+ hdr->epoch = epoch;
- for (i = 0; i < nr_copies; i++) {
- struct sd_req hdr;
- struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
- unsigned wlen = 0, rlen = datalen;
-
- v = obj_vnodes[i];
- addr_to_str(name, sizeof(name), v->addr, 0);
-
- fd = connect_to(name, v->port);
- if (fd < 0) {
- printf("%s(%d): %s, %m\n", __func__, __LINE__,
- name);
- return SD_RES_EIO;
- }
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.epoch = node_version;
- hdr.opcode = SD_OP_READ_OBJ;
- hdr.flags = SD_FLAG_CMD_IO_LOCAL;
- hdr.data_length = rlen;
-
- hdr.obj.oid = oid;
- hdr.obj.offset = offset;
-
- ret = exec_req(fd, &hdr, data, &wlen, &rlen);
- close(fd);
-
- if (ret) {
- last_error = SD_RES_EIO;
- continue;
- }
+ hdr->obj.oid = oid;
+ hdr->obj.offset = offset;
+ hdr->obj.copies = nr_copies;
- if (rsp->result == SD_RES_SUCCESS)
- return SD_RES_SUCCESS;
+ read_req.data = data;
+ read_req.op = get_sd_op(hdr->opcode);
+ read_req.vnodes = vnodes;
- last_error = rsp->result;
- }
+ ret = forward_read_obj_req(&read_req);
+ if (ret != SD_RES_SUCCESS)
+ eprintf("failed to forward read object %x\n", ret);
- return last_error;
+ return ret;
}
-int remove_object(struct vnode_info *vnodes, uint32_t node_version,
+int remove_object(struct vnode_info *vnodes, uint32_t epoch,
uint64_t oid, int nr)
{
struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
@@ -840,7 +714,7 @@ int remove_object(struct vnode_info *vnodes, uint32_t node_version,
}
memset(&hdr, 0, sizeof(hdr));
- hdr.epoch = node_version;
+ hdr.epoch = epoch;
hdr.opcode = SD_OP_REMOVE_OBJ;
hdr.flags = 0;
hdr.data_length = rlen;
--
1.7.10.2
More information about the sheepdog
mailing list