[sheepdog] [PATCH v4 07/10] sheep: use the specified copies number for IO requests
levin li
levin108 at gmail.com
Thu Aug 9 07:27:42 CEST 2012
From: levin li <xingke.lwp at taobao.com>
In gateway_read{write,create_and_write}_obj, and read{write}_object,
we should use different copies number for different requests and objects,
instead of using the global copies number sys->nr_copies or calculated
from vnodes
Signed-off-by: levin li <xingke.lwp at taobao.com>
---
sheep/farm/trunk.c | 2 +-
sheep/gateway.c | 4 +-
sheep/ops.c | 2 +-
sheep/recovery.c | 4 +-
sheep/request.c | 2 +-
sheep/sheep_priv.h | 6 ++--
sheep/store.c | 9 +++++--
sheep/vdi.c | 56 +++++++++++++++++++++++++++++++++------------------
8 files changed, 52 insertions(+), 33 deletions(-)
diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c
index cd1fd20..1bfb6b3 100644
--- a/sheep/farm/trunk.c
+++ b/sheep/farm/trunk.c
@@ -245,7 +245,7 @@ static int oid_stale(uint64_t oid)
struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
vinfo = get_vnode_info();
- nr_copies = get_nr_copies(vinfo);
+ nr_copies = get_obj_copy_number(oid);
oid_to_vnodes(vinfo->vnodes, vinfo->nr_vnodes, oid,
nr_copies, obj_vnodes);
diff --git a/sheep/gateway.c b/sheep/gateway.c
index bdbd08c..41d712b 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -35,7 +35,7 @@ int gateway_read_obj(struct request *req)
if (sys->enable_write_cache && !req->local && !bypass_object_cache(req))
return object_cache_handle_request(req);
- nr_copies = get_nr_copies(req->vinfo);
+ nr_copies = get_req_copy_number(req);
oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
nr_copies, obj_vnodes);
for (i = 0; i < nr_copies; i++) {
@@ -245,7 +245,7 @@ static int gateway_forward_request(struct request *req)
write_info_init(&wi);
wlen = hdr.data_length;
- nr_copies = get_nr_copies(req->vinfo);
+ nr_copies = get_req_copy_number(req);
oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
nr_copies, obj_vnodes);
diff --git a/sheep/ops.c b/sheep/ops.c
index ce0f8a4..faa50b5 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -664,7 +664,7 @@ static int read_copy_from_replica(struct vnode_info *vnodes, uint32_t epoch,
char name[128];
int rounded_rand, local = -1;
- nr_copies = get_nr_copies(vnodes);
+ nr_copies = get_obj_copy_number(oid);
oid_to_vnodes(vnodes->vnodes, vnodes->nr_vnodes, oid,
nr_copies, obj_vnodes);
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 3fdcad2..060730b 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -172,7 +172,7 @@ again:
oid, tgt_epoch);
/* Let's do a breadth-first search */
- nr_copies = get_nr_copies(old);
+ nr_copies = get_obj_copy_number(oid);
for (i = 0; i < nr_copies; i++) {
struct sd_vnode *tgt_vnode = oid_to_vnode(old->vnodes,
old->nr_vnodes,
@@ -515,8 +515,8 @@ static void screen_object_list(struct recovery_work *rw,
int nr_objs;
int i, j;
- nr_objs = get_nr_copies(rw->cur_vinfo);
for (i = 0; i < nr_oids; i++) {
+ nr_objs = get_obj_copy_number(oids[i]);
oid_to_vnodes(rw->cur_vinfo->vnodes, rw->cur_vinfo->nr_vnodes,
oids[i], nr_objs, vnodes);
for (j = 0; j < nr_objs; j++) {
diff --git a/sheep/request.c b/sheep/request.c
index ab7c63a..ce4315b 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -30,7 +30,7 @@ static int is_access_local(struct request *req, uint64_t oid)
int nr_copies;
int i;
- nr_copies = get_nr_copies(req->vinfo);
+ nr_copies = get_req_copy_number(req);
oid_to_vnodes(req->vinfo->vnodes, req->vinfo->nr_vnodes, oid,
nr_copies, obj_vnodes);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 335e337..ec2b4dc 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -282,10 +282,10 @@ int is_recovery_init(void);
int node_in_recovery(void);
int write_object(uint64_t oid, char *data, unsigned int datalen,
- uint64_t offset, uint16_t flags, int create);
+ uint64_t offset, uint16_t flags, int create, int nr_copies);
int read_object(uint64_t oid, char *data, unsigned int datalen,
- uint64_t offset);
-int remove_object(uint64_t oid);
+ uint64_t offset, int nr_copies);
+int remove_object(uint64_t oid, int nr_copies);
int exec_local_req(struct sd_req *rq, void *data);
void local_req_init(void);
diff --git a/sheep/store.c b/sheep/store.c
index bebb2c0..4839d13 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -548,7 +548,7 @@ int init_store(const char *d, int enable_write_cache)
* Write data to both local object cache (if enabled) and backends
*/
int write_object(uint64_t oid, char *data, unsigned int datalen,
- uint64_t offset, uint16_t flags, int create)
+ uint64_t offset, uint16_t flags, int create, int nr_copies)
{
struct sd_req hdr;
int ret;
@@ -576,6 +576,7 @@ forward_write:
hdr.obj.oid = oid;
hdr.obj.offset = offset;
+ hdr.obj.copies = nr_copies;
ret = exec_local_req(&hdr, data);
if (ret != SD_RES_SUCCESS)
@@ -589,7 +590,7 @@ forward_write:
* try read backends
*/
int read_object(uint64_t oid, char *data, unsigned int datalen,
- uint64_t offset)
+ uint64_t offset, int nr_copies)
{
struct sd_req hdr;
int ret;
@@ -609,6 +610,7 @@ forward_read:
hdr.data_length = datalen;
hdr.obj.oid = oid;
hdr.obj.offset = offset;
+ hdr.obj.copies = nr_copies;
ret = exec_local_req(&hdr, data);
if (ret != SD_RES_SUCCESS)
@@ -617,13 +619,14 @@ forward_read:
return ret;
}
-int remove_object(uint64_t oid)
+int remove_object(uint64_t oid, int copies)
{
struct sd_req hdr;
int ret;
sd_init_req(&hdr, SD_OP_REMOVE_OBJ);
hdr.obj.oid = oid;
+ hdr.obj.copies = copies;
ret = exec_local_req(&hdr, NULL);
if (ret != SD_RES_SUCCESS)
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 72fbd7b..c39074f 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -300,6 +300,7 @@ int vdi_exist(uint32_t vid)
{
struct sheepdog_inode *inode;
int ret = 1;
+ int nr_copies;
inode = zalloc(sizeof(*inode));
if (!inode) {
@@ -307,8 +308,10 @@ int vdi_exist(uint32_t vid)
goto out;
}
+ nr_copies = get_vdi_copy_number(vid);
+
ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
- sizeof(*inode), 0);
+ sizeof(*inode), 0, nr_copies);
if (ret != SD_RES_SUCCESS) {
eprintf("fail to read vdi inode (%" PRIx32 ")\n", vid);
ret = 0;
@@ -359,7 +362,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
if (iocb->base_vid) {
ret = read_object(vid_to_vdi_oid(iocb->base_vid), (char *)base,
- sizeof(*base), 0);
+ sizeof(*base), 0, iocb->nr_copies);
if (ret != SD_RES_SUCCESS) {
ret = SD_RES_BASE_VDI_READ;
goto out;
@@ -374,7 +377,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
name, cur_vid, iocb->base_vid);
ret = read_object(vid_to_vdi_oid(cur_vid), (char *)cur,
- SD_INODE_HEADER_SIZE, 0);
+ SD_INODE_HEADER_SIZE, 0, iocb->nr_copies);
if (ret != SD_RES_SUCCESS) {
vprintf(SDOG_ERR, "failed\n");
ret = SD_RES_BASE_VDI_READ;
@@ -416,7 +419,8 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
if (iocb->is_snapshot && cur_vid != iocb->base_vid) {
ret = write_object(vid_to_vdi_oid(cur_vid), (char *)cur,
- SD_INODE_HEADER_SIZE, 0, 0, 0);
+ SD_INODE_HEADER_SIZE, 0, 0, 0,
+ iocb->nr_copies);
if (ret != 0) {
vprintf(SDOG_ERR, "failed\n");
ret = SD_RES_BASE_VDI_READ;
@@ -426,7 +430,8 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
if (iocb->base_vid) {
ret = write_object(vid_to_vdi_oid(iocb->base_vid), (char *)base,
- SD_INODE_HEADER_SIZE, 0, 0, 0);
+ SD_INODE_HEADER_SIZE, 0, 0, 0,
+ iocb->nr_copies);
if (ret != 0) {
vprintf(SDOG_ERR, "failed\n");
ret = SD_RES_BASE_VDI_WRITE;
@@ -435,7 +440,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
}
ret = write_object(vid_to_vdi_oid(new_vid), (char *)new, sizeof(*new),
- 0, 0, 1);
+ 0, 0, 1, iocb->nr_copies);
if (ret != 0)
ret = SD_RES_VDI_WRITE;
@@ -455,6 +460,7 @@ static int find_first_vdi(unsigned long start, unsigned long end, char *name,
unsigned long i;
int ret = SD_RES_NO_MEM;
int vdi_found = 0;
+ int nr_copies;
inode = malloc(SD_INODE_HEADER_SIZE);
if (!inode) {
@@ -463,8 +469,9 @@ static int find_first_vdi(unsigned long start, unsigned long end, char *name,
}
for (i = start; i >= end; i--) {
+ nr_copies = get_vdi_copy_number(i);
ret = read_object(vid_to_vdi_oid(i), (char *)inode,
- SD_INODE_HEADER_SIZE, 0);
+ SD_INODE_HEADER_SIZE, 0, nr_copies);
if (ret != SD_RES_SUCCESS) {
ret = SD_RES_EIO;
goto out_free_inode;
@@ -662,6 +669,7 @@ struct deletion_work {
struct request *req;
uint32_t vid;
+ int nr_copies;
int count;
uint32_t *buf;
@@ -681,7 +689,7 @@ static int delete_inode(struct deletion_work *dw)
}
ret = read_object(vid_to_vdi_oid(dw->vid), (char *)inode,
- SD_INODE_HEADER_SIZE, 0);
+ SD_INODE_HEADER_SIZE, 0, dw->nr_copies);
if (ret != SD_RES_SUCCESS) {
ret = SD_RES_EIO;
goto out;
@@ -690,7 +698,7 @@ static int delete_inode(struct deletion_work *dw)
memset(inode->name, 0, sizeof(inode->name));
ret = write_object(vid_to_vdi_oid(dw->vid), (char *)inode,
- SD_INODE_HEADER_SIZE, 0, 0, 0);
+ SD_INODE_HEADER_SIZE, 0, 0, 0, dw->nr_copies);
if (ret != 0) {
ret = SD_RES_EIO;
goto out;
@@ -723,6 +731,7 @@ static void delete_one(struct work *work)
uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1);
int ret, i, nr_deleted;
struct sheepdog_inode *inode = NULL;
+ int nr_copies;
eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id);
@@ -732,8 +741,9 @@ static void delete_one(struct work *work)
goto out;
}
+ nr_copies = get_vdi_copy_number(vdi_id);
ret = read_object(vid_to_vdi_oid(vdi_id),
- (void *)inode, sizeof(*inode), 0);
+ (void *)inode, sizeof(*inode), 0, nr_copies);
if (ret != SD_RES_SUCCESS) {
eprintf("cannot find VDI object\n");
@@ -757,7 +767,7 @@ static void delete_one(struct work *work)
continue;
}
- ret = remove_object(oid);
+ ret = remove_object(oid, nr_copies);
if (ret != SD_RES_SUCCESS)
eprintf("remove object %" PRIx64 " fail, %d\n", oid, ret);
@@ -774,7 +784,7 @@ static void delete_one(struct work *work)
memset(inode->name, 0, sizeof(inode->name));
write_object(vid_to_vdi_oid(vdi_id), (void *)inode,
- sizeof(*inode), 0, 0, 0);
+ sizeof(*inode), 0, 0, 0, nr_copies);
out:
free(inode);
}
@@ -811,6 +821,7 @@ static int fill_vdi_list(struct deletion_work *dw, uint32_t root_vid)
struct sheepdog_inode *inode = NULL;
int done = dw->count;
uint32_t vid;
+ int nr_copies;
inode = malloc(SD_INODE_HEADER_SIZE);
if (!inode) {
@@ -821,8 +832,9 @@ static int fill_vdi_list(struct deletion_work *dw, uint32_t root_vid)
dw->buf[dw->count++] = root_vid;
again:
vid = dw->buf[done++];
+ nr_copies = get_vdi_copy_number(vid);
ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
- SD_INODE_HEADER_SIZE, 0);
+ SD_INODE_HEADER_SIZE, 0, nr_copies);
if (ret != SD_RES_SUCCESS) {
eprintf("cannot find VDI object\n");
@@ -851,7 +863,7 @@ out:
static uint64_t get_vdi_root(uint32_t vid, int *cloned)
{
- int ret;
+ int ret, nr_copies;
struct sheepdog_inode *inode = NULL;
*cloned = 0;
@@ -863,8 +875,9 @@ static uint64_t get_vdi_root(uint32_t vid, int *cloned)
goto out;
}
next:
+ nr_copies = get_vdi_copy_number(vid);
ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
- SD_INODE_HEADER_SIZE, 0);
+ SD_INODE_HEADER_SIZE, 0, nr_copies);
if (vid == inode->vdi_id && inode->snap_id == 1
&& inode->parent_vdi_id != 0
@@ -910,6 +923,7 @@ static int start_deletion(struct request *req, uint32_t vid)
dw->count = 0;
dw->vid = vid;
dw->req = req;
+ dw->nr_copies = get_vdi_copy_number(vid);
dw->work.fn = delete_one;
dw->work.done = delete_one_done;
@@ -965,7 +979,7 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
struct sheepdog_vdi_attr tmp_attr;
uint64_t oid, hval;
uint32_t end;
- int ret;
+ int ret, nr_copies;
vattr->ctime = create_time;
@@ -979,12 +993,13 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
end = *attrid - 1;
while (*attrid != end) {
oid = vid_to_attr_oid(vid, *attrid);
+ nr_copies = get_vdi_copy_number(vid);
ret = read_object(oid, (char *)&tmp_attr,
- sizeof(tmp_attr), 0);
+ sizeof(tmp_attr), 0, nr_copies);
if (ret == SD_RES_NO_OBJ && wr) {
ret = write_object(oid, (char *)vattr,
- data_len, 0, 0, 1);
+ data_len, 0, 0, 1, nr_copies);
if (ret)
ret = SD_RES_EIO;
else
@@ -1005,14 +1020,15 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
else if (delete) {
ret = write_object(oid, (char *)"", 1,
offsetof(struct sheepdog_vdi_attr, name),
- 0, 0);
+ 0, 0, nr_copies);
if (ret)
ret = SD_RES_EIO;
else
ret = SD_RES_SUCCESS;
} else if (wr) {
ret = write_object(oid, (char *)vattr,
- SD_ATTR_OBJ_SIZE, 0, 0, 0);
+ SD_ATTR_OBJ_SIZE, 0, 0, 0,
+ nr_copies);
if (ret)
ret = SD_RES_EIO;
--
1.7.1
More information about the sheepdog
mailing list