[sheepdog] [PATCH 4/6] sheep: use the specified copies number for IO requests
levin li
levin108 at gmail.com
Mon Aug 6 07:59:57 CEST 2012
From: levin li <xingke.lwp at taobao.com>
In gateway_read{write,create_and_write}_obj, and read{write}_object,
we should use different copies number for different requests and objects,
instead of using the global copies number sys->nr_copies or calculated
from vnodes
Signed-off-by: levin li <xingke.lwp at taobao.com>
---
sheep/farm/trunk.c | 2 +-
sheep/gateway.c | 6 ++--
sheep/ops.c | 2 +-
sheep/recovery.c | 4 +-
sheep/request.c | 2 +-
sheep/sheep_priv.h | 6 ++--
sheep/store.c | 9 +++++--
sheep/vdi.c | 56 +++++++++++++++++++++++++++++++++------------------
8 files changed, 53 insertions(+), 34 deletions(-)
diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c
index cd1fd20..a818636 100644
--- a/sheep/farm/trunk.c
+++ b/sheep/farm/trunk.c
@@ -245,7 +245,7 @@ static int oid_stale(uint64_t oid)
struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
vinfo = get_vnode_info();
- nr_copies = get_nr_copies(vinfo);
+ nr_copies = get_obj_copies(oid);
oid_to_vnodes(vinfo->vnodes, vinfo->nr_vnodes, oid,
nr_copies, obj_vnodes);
diff --git a/sheep/gateway.c b/sheep/gateway.c
index bdbd08c..4c16755 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -35,7 +35,7 @@ int gateway_read_obj(struct request *req)
if (sys->enable_write_cache && !req->local && !bypass_object_cache(req))
return object_cache_handle_request(req);
- nr_copies = get_nr_copies(req->vinfo);
+ nr_copies = get_req_copies(req);
oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
nr_copies, obj_vnodes);
for (i = 0; i < nr_copies; i++) {
@@ -233,7 +233,7 @@ static int gateway_forward_request(struct request *req)
struct sd_vnode *v;
struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
uint64_t oid = req->rq.obj.oid;
- int nr_copies;
+ int nr_copies = req->rq.obj.copies;
struct write_info wi;
struct sd_op_template *op;
struct sd_req hdr;
@@ -245,7 +245,7 @@ static int gateway_forward_request(struct request *req)
write_info_init(&wi);
wlen = hdr.data_length;
- nr_copies = get_nr_copies(req->vinfo);
+ nr_copies = get_req_copies(req);
oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
nr_copies, obj_vnodes);
diff --git a/sheep/ops.c b/sheep/ops.c
index efaf979..4037dd5 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -654,7 +654,7 @@ static int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
char name[128];
int rounded_rand, local = -1;
- nr_copies = get_nr_copies(vnodes);
+ nr_copies = get_obj_copies(oid);
oid_to_vnodes(vnodes->vnodes, vnodes->nr_vnodes, oid,
nr_copies, obj_vnodes);
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 5164aa7..6919b64 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -172,7 +172,7 @@ again:
oid, tgt_epoch);
/* Let's do a breadth-first search */
- nr_copies = get_nr_copies(old);
+ nr_copies = get_obj_copies(nr_copies);
for (i = 0; i < nr_copies; i++) {
struct sd_vnode *tgt_vnode = oid_to_vnode(old->vnodes,
old->nr_vnodes,
@@ -515,8 +515,8 @@ static void screen_object_list(struct recovery_work *rw,
int nr_objs;
int i, j;
- nr_objs = get_nr_copies(rw->cur_vinfo);
for (i = 0; i < nr_oids; i++) {
+ nr_objs = get_obj_copies(oids[i]);
oid_to_vnodes(rw->cur_vinfo->vnodes, rw->cur_vinfo->nr_vnodes,
oids[i], nr_objs, vnodes);
for (j = 0; j < nr_objs; j++) {
diff --git a/sheep/request.c b/sheep/request.c
index ab7c63a..6b5c957 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -30,7 +30,7 @@ static int is_access_local(struct request *req, uint64_t oid)
int nr_copies;
int i;
- nr_copies = get_nr_copies(req->vinfo);
+ nr_copies = get_req_copies(req);
oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
nr_copies, obj_vnodes);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index b6830ae..891fe87 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -277,10 +277,10 @@ int is_recovery_init(void);
int node_in_recovery(void);
int write_object(uint64_t oid, char *data, unsigned int datalen,
- uint64_t offset, uint16_t flags, int create);
+ uint64_t offset, uint16_t flags, int create, int nr_copies);
int read_object(uint64_t oid, char *data, unsigned int datalen,
- uint64_t offset);
-int remove_object(uint64_t oid);
+ uint64_t offset, int nr_copies);
+int remove_object(uint64_t oid, int nr_copies);
int exec_local_req(struct sd_req *rq, void *data);
void local_req_init(void);
diff --git a/sheep/store.c b/sheep/store.c
index b093916..2afbef4 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -527,7 +527,7 @@ int init_store(const char *d, int enable_write_cache)
* Write data to both local object cache (if enabled) and backends
*/
int write_object(uint64_t oid, char *data, unsigned int datalen,
- uint64_t offset, uint16_t flags, int create)
+ uint64_t offset, uint16_t flags, int create, int nr_copies)
{
struct sd_req hdr;
int ret;
@@ -555,6 +555,7 @@ forward_write:
hdr.obj.oid = oid;
hdr.obj.offset = offset;
+ hdr.obj.copies = nr_copies;
ret = exec_local_req(&hdr, data);
if (ret != SD_RES_SUCCESS)
@@ -568,7 +569,7 @@ forward_write:
* try read backends
*/
int read_object(uint64_t oid, char *data, unsigned int datalen,
- uint64_t offset)
+ uint64_t offset, int nr_copies)
{
struct sd_req hdr;
int ret;
@@ -588,6 +589,7 @@ forward_read:
hdr.data_length = datalen;
hdr.obj.oid = oid;
hdr.obj.offset = offset;
+ hdr.obj.copies = nr_copies;
ret = exec_local_req(&hdr, data);
if (ret != SD_RES_SUCCESS)
@@ -596,13 +598,14 @@ forward_read:
return ret;
}
-int remove_object(uint64_t oid)
+int remove_object(uint64_t oid, int copies)
{
struct sd_req hdr;
int ret;
sd_init_req(&hdr, SD_OP_REMOVE_OBJ);
hdr.obj.oid = oid;
+ hdr.obj.copies = copies;
ret = exec_local_req(&hdr, NULL);
if (ret != SD_RES_SUCCESS)
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 28611b8..11dcc2d 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -19,6 +19,7 @@ int vdi_exist(uint32_t vid)
{
struct sheepdog_inode *inode;
int ret = 1;
+ int nr_copies;
inode = zalloc(sizeof(*inode));
if (!inode) {
@@ -26,8 +27,10 @@ int vdi_exist(uint32_t vid)
goto out;
}
+ nr_copies = get_vdi_copies(vid);
+
ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
- sizeof(*inode), 0);
+ sizeof(*inode), 0, nr_copies);
if (ret != SD_RES_SUCCESS) {
eprintf("fail to read vdi inode (%" PRIx32 ")\n", vid);
ret = 0;
@@ -78,7 +81,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
if (iocb->base_vid) {
ret = read_object(vid_to_vdi_oid(iocb->base_vid), (char *)base,
- sizeof(*base), 0);
+ sizeof(*base), 0, iocb->nr_copies);
if (ret != SD_RES_SUCCESS) {
ret = SD_RES_BASE_VDI_READ;
goto out;
@@ -93,7 +96,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
name, cur_vid, iocb->base_vid);
ret = read_object(vid_to_vdi_oid(cur_vid), (char *)cur,
- SD_INODE_HEADER_SIZE, 0);
+ SD_INODE_HEADER_SIZE, 0, iocb->nr_copies);
if (ret != SD_RES_SUCCESS) {
vprintf(SDOG_ERR, "failed\n");
ret = SD_RES_BASE_VDI_READ;
@@ -135,7 +138,8 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
if (iocb->is_snapshot && cur_vid != iocb->base_vid) {
ret = write_object(vid_to_vdi_oid(cur_vid), (char *)cur,
- SD_INODE_HEADER_SIZE, 0, 0, 0);
+ SD_INODE_HEADER_SIZE, 0, 0, 0,
+ iocb->nr_copies);
if (ret != 0) {
vprintf(SDOG_ERR, "failed\n");
ret = SD_RES_BASE_VDI_READ;
@@ -145,7 +149,8 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
if (iocb->base_vid) {
ret = write_object(vid_to_vdi_oid(iocb->base_vid), (char *)base,
- SD_INODE_HEADER_SIZE, 0, 0, 0);
+ SD_INODE_HEADER_SIZE, 0, 0, 0,
+ iocb->nr_copies);
if (ret != 0) {
vprintf(SDOG_ERR, "failed\n");
ret = SD_RES_BASE_VDI_WRITE;
@@ -154,7 +159,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
}
ret = write_object(vid_to_vdi_oid(new_vid), (char *)new, sizeof(*new),
- 0, 0, 1);
+ 0, 0, 1, iocb->nr_copies);
if (ret != 0)
ret = SD_RES_VDI_WRITE;
@@ -174,6 +179,7 @@ static int find_first_vdi(unsigned long start, unsigned long end, char *name,
unsigned long i;
int ret = SD_RES_NO_MEM;
int vdi_found = 0;
+ int nr_copies;
inode = malloc(SD_INODE_HEADER_SIZE);
if (!inode) {
@@ -182,8 +188,9 @@ static int find_first_vdi(unsigned long start, unsigned long end, char *name,
}
for (i = start; i >= end; i--) {
+ nr_copies = get_vdi_copies(i);
ret = read_object(vid_to_vdi_oid(i), (char *)inode,
- SD_INODE_HEADER_SIZE, 0);
+ SD_INODE_HEADER_SIZE, 0, nr_copies);
if (ret != SD_RES_SUCCESS) {
ret = SD_RES_EIO;
goto out_free_inode;
@@ -380,6 +387,7 @@ struct deletion_work {
struct request *req;
uint32_t vid;
+ int nr_copies;
int count;
uint32_t *buf;
@@ -399,7 +407,7 @@ static int delete_inode(struct deletion_work *dw)
}
ret = read_object(vid_to_vdi_oid(dw->vid), (char *)inode,
- SD_INODE_HEADER_SIZE, 0);
+ SD_INODE_HEADER_SIZE, 0, dw->nr_copies);
if (ret != SD_RES_SUCCESS) {
ret = SD_RES_EIO;
goto out;
@@ -408,7 +416,7 @@ static int delete_inode(struct deletion_work *dw)
memset(inode->name, 0, sizeof(inode->name));
ret = write_object(vid_to_vdi_oid(dw->vid), (char *)inode,
- SD_INODE_HEADER_SIZE, 0, 0, 0);
+ SD_INODE_HEADER_SIZE, 0, 0, 0, dw->nr_copies);
if (ret != 0) {
ret = SD_RES_EIO;
goto out;
@@ -441,6 +449,7 @@ static void delete_one(struct work *work)
uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1);
int ret, i, nr_deleted;
struct sheepdog_inode *inode = NULL;
+ int nr_copies;
eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id);
@@ -450,8 +459,9 @@ static void delete_one(struct work *work)
goto out;
}
+ nr_copies = get_vdi_copies(vdi_id);
ret = read_object(vid_to_vdi_oid(vdi_id),
- (void *)inode, sizeof(*inode), 0);
+ (void *)inode, sizeof(*inode), 0, nr_copies);
if (ret != SD_RES_SUCCESS) {
eprintf("cannot find VDI object\n");
@@ -475,7 +485,7 @@ static void delete_one(struct work *work)
continue;
}
- ret = remove_object(oid);
+ ret = remove_object(oid, nr_copies);
if (ret != SD_RES_SUCCESS)
eprintf("remove object %" PRIx64 " fail, %d\n", oid, ret);
@@ -492,7 +502,7 @@ static void delete_one(struct work *work)
memset(inode->name, 0, sizeof(inode->name));
write_object(vid_to_vdi_oid(vdi_id), (void *)inode,
- sizeof(*inode), 0, 0, 0);
+ sizeof(*inode), 0, 0, 0, nr_copies);
out:
free(inode);
}
@@ -529,6 +539,7 @@ static int fill_vdi_list(struct deletion_work *dw, uint32_t root_vid)
struct sheepdog_inode *inode = NULL;
int done = dw->count;
uint32_t vid;
+ int nr_copies;
inode = malloc(SD_INODE_HEADER_SIZE);
if (!inode) {
@@ -539,8 +550,9 @@ static int fill_vdi_list(struct deletion_work *dw, uint32_t root_vid)
dw->buf[dw->count++] = root_vid;
again:
vid = dw->buf[done++];
+ nr_copies = get_vdi_copies(vid);
ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
- SD_INODE_HEADER_SIZE, 0);
+ SD_INODE_HEADER_SIZE, 0, nr_copies);
if (ret != SD_RES_SUCCESS) {
eprintf("cannot find VDI object\n");
@@ -569,7 +581,7 @@ out:
static uint64_t get_vdi_root(uint32_t vid, int *cloned)
{
- int ret;
+ int ret, nr_copies;
struct sheepdog_inode *inode = NULL;
*cloned = 0;
@@ -581,8 +593,9 @@ static uint64_t get_vdi_root(uint32_t vid, int *cloned)
goto out;
}
next:
+ nr_copies = get_vdi_copies(vid);
ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
- SD_INODE_HEADER_SIZE, 0);
+ SD_INODE_HEADER_SIZE, 0, nr_copies);
if (vid == inode->vdi_id && inode->snap_id == 1
&& inode->parent_vdi_id != 0
@@ -628,6 +641,7 @@ static int start_deletion(struct request *req, uint32_t vid)
dw->count = 0;
dw->vid = vid;
dw->req = req;
+ dw->nr_copies = get_vdi_copies(vid);
dw->work.fn = delete_one;
dw->work.done = delete_one_done;
@@ -683,7 +697,7 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
struct sheepdog_vdi_attr tmp_attr;
uint64_t oid, hval;
uint32_t end;
- int ret;
+ int ret, nr_copies;
vattr->ctime = ctime;
@@ -697,12 +711,13 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
end = *attrid - 1;
while (*attrid != end) {
oid = vid_to_attr_oid(vid, *attrid);
+ nr_copies = get_vdi_copies(vid);
ret = read_object(oid, (char *)&tmp_attr,
- sizeof(tmp_attr), 0);
+ sizeof(tmp_attr), 0, nr_copies);
if (ret == SD_RES_NO_OBJ && wr) {
ret = write_object(oid, (char *)vattr,
- data_len, 0, 0, 1);
+ data_len, 0, 0, 1, nr_copies);
if (ret)
ret = SD_RES_EIO;
else
@@ -723,14 +738,15 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
else if (delete) {
ret = write_object(oid, (char *)"", 1,
offsetof(struct sheepdog_vdi_attr, name),
- 0, 0);
+ 0, 0, nr_copies);
if (ret)
ret = SD_RES_EIO;
else
ret = SD_RES_SUCCESS;
} else if (wr) {
ret = write_object(oid, (char *)vattr,
- SD_ATTR_OBJ_SIZE, 0, 0, 0);
+ SD_ATTR_OBJ_SIZE, 0, 0, 0,
+ nr_copies);
if (ret)
ret = SD_RES_EIO;
--
1.7.1
More information about the sheepdog
mailing list