Add a new get_nr_copies helper to consolidate calculating the number of copies of an object that we need to deal with. As a side effect this allows making struct vnode_info private to group.c again. There are a few places that take a different number of copies from the inode or the on the wire header. These look incorrect to me, but I'd like to have some review of those. These places should be either documented or removed ASAP. Signed-off-by: Christoph Hellwig --- sheep/farm/trunk.c | 12 +++----- sheep/group.c | 19 +++++++++++++ sheep/object_cache.c | 11 ++----- sheep/sdnet.c | 39 ++++++++++---------------- sheep/sheep_priv.h | 9 +----- sheep/store.c | 41 +++++++++------------------- sheep/vdi.c | 74 +++++++++++++++++++++++++++++++-------------------- 7 files changed, 104 insertions(+), 101 deletions(-) Index: sheepdog/sheep/store.c =================================================================== --- sheepdog.orig/sheep/store.c 2012-04-27 15:33:37.000000000 +0200 +++ sheepdog/sheep/store.c 2012-04-27 15:33:47.916055550 +0200 @@ -223,7 +223,7 @@ int get_obj_list(const struct sd_list_re static int read_copy_from_replica(struct request *req, uint32_t epoch, uint64_t oid, char *buf) { - int i, nr, ret; + int i, nr_copies, ret; unsigned wlen, rlen; char name[128]; struct sd_vnode *v; @@ -232,11 +232,8 @@ static int read_copy_from_replica(struct struct siocb iocb; int fd; - nr = sys->nr_sobjs; - if (nr > req->vnodes->nr_zones) - nr = req->vnodes->nr_zones; - - for (i = 0; i < nr; i++) { + nr_copies = get_nr_copies(req->vnodes); + for (i = 0; i < nr_copies; i++) { v = oid_to_vnode(req->vnodes, oid, i); addr_to_str(name, sizeof(name), v->addr, 0); @@ -305,20 +302,17 @@ static int forward_read_obj_req(struct r struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; struct sd_vnode *v; uint64_t oid = hdr.oid; - int copies; - - copies = hdr.copies; - - /* temporary hack */ - if (!copies) - copies = sys->nr_sobjs; - if (copies > req->vnodes->nr_zones) - copies = req->vnodes->nr_zones; + int nr_copies; hdr.flags |= SD_FLAG_CMD_IO_LOCAL; + if (hdr.copies) + nr_copies = hdr.copies; + else + nr_copies = get_nr_copies(req->vnodes); + /* TODO: we can do better; we need to check this first */ - for (i = 0; i < copies; i++) { + for (i = 0; i < nr_copies; i++) { v = oid_to_vnode(req->vnodes, oid, i); if (vnode_is_local(v)) { ret = do_local_io(req, hdr.epoch); @@ -329,7 +323,7 @@ static int forward_read_obj_req(struct r } read_remote: - for (i = 0; i < copies; i++) { + for (i = 0; i < nr_copies; i++) { v = oid_to_vnode(req->vnodes, oid, i); if (vnode_is_local(v)) continue; @@ -367,20 +361,12 @@ int forward_write_obj_req(struct request struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&req->rp; struct sd_vnode *v; uint64_t oid = hdr.oid; - int copies; + int nr_copies; struct pollfd pfds[SD_MAX_REDUNDANCY]; int nr_fds, local = 0; dprintf("%"PRIx64"\n", oid); - copies = hdr.copies; - - /* temporary hack */ - if (!copies) - copies = sys->nr_sobjs; - if (copies > req->vnodes->nr_zones) - copies = req->vnodes->nr_zones; - nr_fds = 0; memset(pfds, 0, sizeof(pfds)); for (i = 0; i < ARRAY_SIZE(pfds); i++) @@ -390,7 +376,8 @@ int forward_write_obj_req(struct request wlen = hdr.data_length; - for (i = 0; i < copies; i++) { + nr_copies = get_nr_copies(req->vnodes); + for (i = 0; i < nr_copies; i++) { v = oid_to_vnode(req->vnodes, oid, i); addr_to_str(name, sizeof(name), v->addr, 0); Index: sheepdog/sheep/farm/trunk.c =================================================================== --- sheepdog.orig/sheep/farm/trunk.c 2012-04-27 15:33:37.000000000 +0200 +++ sheepdog/sheep/farm/trunk.c 2012-04-27 15:33:47.916055550 +0200 @@ -235,16 +235,14 @@ static unsigned char *omap_tree_insert(u static int oid_stale(uint64_t oid) { - int i, copies; - struct vnode_info *vnodes = get_vnode_info(); + int i, nr_copies; + struct vnode_info *vnodes; struct sd_vnode *v; int ret = 1; - copies = sys->nr_sobjs; - if (copies > vnodes->nr_zones) - copies = vnodes->nr_zones; - - for (i = 0; i < copies; i++) { + vnodes = get_vnode_info(); + nr_copies = get_nr_copies(vnodes); + for (i = 0; i < nr_copies; i++) { v = oid_to_vnode(vnodes, oid, i); if (vnode_is_local(v)) { ret = 0; Index: sheepdog/sheep/group.c =================================================================== --- sheepdog.orig/sheep/group.c 2012-04-27 15:32:59.000000000 +0200 +++ sheepdog/sheep/group.c 2012-04-27 15:33:47.916055550 +0200 @@ -32,6 +32,13 @@ struct node { struct list_head list; }; +struct vnode_info { + struct sd_vnode entries[SD_MAX_VNODES]; + int nr_vnodes; + int nr_zones; + int refcnt; +}; + struct join_message { uint8_t proto_ver; uint8_t nr_sobjs; @@ -170,6 +177,18 @@ static int update_vnode_info(void) return 0; } +/* + * If we have less zones available than the desired redundancy we have to do + * with nr_zones copies, sorry. + */ +int get_nr_copies(struct vnode_info *vnode_info) +{ + int nr_copies = vnode_info->nr_zones; + if (nr_copies < sys->nr_sobjs) + nr_copies = sys->nr_sobjs; + return nr_copies; +} + static void do_cluster_op(void *arg) { struct vdi_op_message *msg = arg; Index: sheepdog/sheep/object_cache.c =================================================================== --- sheepdog.orig/sheep/object_cache.c 2012-04-27 15:33:37.000000000 +0200 +++ sheepdog/sheep/object_cache.c 2012-04-27 15:33:47.916055550 +0200 @@ -377,7 +377,7 @@ int object_cache_pull(struct object_cach struct vnode_info *vnodes = get_vnode_info(); struct sd_vnode *v; void *buf; - int copies; + int nr_copies; if (idx & CACHE_VDI_BIT) { oid = vid_to_vdi_oid(oc->vid); @@ -393,12 +393,9 @@ int object_cache_pull(struct object_cach goto out; } - copies = sys->nr_sobjs; - if (vnodes->nr_zones < copies) - copies = vnodes->nr_zones; - /* Check if we can read locally */ - for (i = 0; i < copies; i++) { + nr_copies = get_nr_copies(vnodes); + for (i = 0; i < nr_copies; i++) { v = oid_to_vnode(vnodes, oid, i); if (vnode_is_local(v)) { struct siocb iocb = { 0 }; @@ -423,7 +420,7 @@ int object_cache_pull(struct object_cach pull_remote: /* Okay, no luck, let's read remotely */ - for (i = 0; i < copies; i++) { + for (i = 0; i < nr_copies; i++) { v = oid_to_vnode(vnodes, oid, i); if (vnode_is_local(v)) Index: sheepdog/sheep/sdnet.c =================================================================== --- sheepdog.orig/sheep/sdnet.c 2012-04-27 15:33:37.792055290 +0200 +++ sheepdog/sheep/sdnet.c 2012-04-27 15:33:47.920055548 +0200 @@ -38,20 +38,19 @@ void resume_pending_requests(void) static int is_access_local(struct request *req, uint64_t oid, int copies) { - struct vnode_info *vnodes = req->vnodes; struct sd_vnode *v; + int nr_copies; int i; if (oid == 0) return 0; - if (copies) - copies = sys->nr_sobjs; - if (copies > vnodes->nr_zones) - copies = vnodes->nr_zones; + nr_copies = get_nr_copies(req->vnodes); + if (copies < nr_copies) + nr_copies = copies; for (i = 0; i < copies; i++) { - v = oid_to_vnode(vnodes, oid, i); + v = oid_to_vnode(req->vnodes, oid, i); if (vnode_is_local(v)) return 1; } @@ -677,23 +676,21 @@ int create_listen_port(int port, void *d int write_object(struct vnode_info *vnodes, uint32_t node_version, uint64_t oid, char *data, unsigned int datalen, - uint64_t offset, uint16_t flags, int nr, int create) + uint64_t offset, uint16_t flags, int nr_copies, int create) { struct sd_obj_req hdr; struct sd_vnode *v; int i, fd, ret; char name[128]; - if (nr > vnodes->nr_zones) - nr = vnodes->nr_zones; - - for (i = 0; i < nr; i++) { + for (i = 0; i < nr_copies; i++) { unsigned rlen = 0, wlen = datalen; v = oid_to_vnode(vnodes, oid, i); if (vnode_is_local(v)) { ret = write_object_local(oid, data, datalen, offset, - flags, nr, node_version, create); + flags, nr_copies, node_version, + create); if (ret != 0) { eprintf("fail %"PRIx64" %"PRIx32"\n", oid, ret); @@ -719,7 +716,7 @@ int write_object(struct vnode_info *vnod hdr.opcode = SD_OP_WRITE_OBJ; hdr.oid = oid; - hdr.copies = nr; + hdr.copies = nr_copies; hdr.flags = flags; hdr.flags |= SD_FLAG_CMD_WRITE | SD_FLAG_CMD_IO_LOCAL; @@ -739,7 +736,7 @@ int write_object(struct vnode_info *vnod int read_object(struct vnode_info *vnodes, uint32_t node_version, uint64_t oid, char *data, unsigned int datalen, - uint64_t offset, int nr) + uint64_t offset, int nr_copies) { struct sd_obj_req hdr; struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr; @@ -747,15 +744,12 @@ int read_object(struct vnode_info *vnode char name[128]; int i = 0, fd, ret, last_error = SD_RES_SUCCESS; - if (nr > vnodes->nr_zones) - nr = vnodes->nr_zones; - /* search a local object first */ - for (i = 0; i < nr; i++) { + for (i = 0; i < nr_copies; i++) { v = oid_to_vnode(vnodes, oid, i); if (vnode_is_local(v)) { - ret = read_object_local(oid, data, datalen, offset, nr, - node_version); + ret = read_object_local(oid, data, datalen, offset, + nr_copies, node_version); if (ret != SD_RES_SUCCESS) { eprintf("fail %"PRIx64" %"PRId32"\n", oid, ret); @@ -767,7 +761,7 @@ int read_object(struct vnode_info *vnode } - for (i = 0; i < nr; i++) { + for (i = 0; i < nr_copies; i++) { unsigned wlen = 0, rlen = datalen; v = oid_to_vnode(vnodes, oid, i); @@ -816,9 +810,6 @@ int remove_object(struct vnode_info *vno struct sd_vnode *v; int i = 0, fd, ret, err = 0; - if (nr > vnodes->nr_zones) - nr = vnodes->nr_zones; - for (i = 0; i < nr; i++) { unsigned wlen = 0, rlen = 0; Index: sheepdog/sheep/sheep_priv.h =================================================================== --- sheepdog.orig/sheep/sheep_priv.h 2012-04-27 15:33:37.000000000 +0200 +++ sheepdog/sheep/sheep_priv.h 2012-04-27 15:33:47.920055548 +0200 @@ -62,14 +62,8 @@ struct client_info { int refcnt; }; -struct vnode_info { - struct sd_vnode entries[SD_MAX_VNODES]; - int nr_vnodes; - int nr_zones; - int refcnt; -}; - struct request; +struct vnode_info; typedef void (*req_end_t) (struct request *); @@ -245,6 +239,7 @@ void put_vnode_info(struct vnode_info *v struct sd_vnode *oid_to_vnode(struct vnode_info *vnode_info, uint64_t oid, int copy_idx); +int get_nr_copies(struct vnode_info *vnode_info); int is_access_to_busy_objects(uint64_t oid); Index: sheepdog/sheep/vdi.c =================================================================== --- sheepdog.orig/sheep/vdi.c 2012-04-27 15:32:33.000000000 +0200 +++ sheepdog/sheep/vdi.c 2012-04-27 15:38:25.124062648 +0200 @@ -27,6 +27,7 @@ static int create_vdi_obj(uint32_t epoch struct timeval tv; int ret = SD_RES_NO_MEM; unsigned long block_size = SD_DATA_OBJ_SIZE; + int nr_copies; new = zalloc(sizeof(*new)); if (!new) { @@ -52,10 +53,14 @@ static int create_vdi_obj(uint32_t epoch vnode_info = get_vnode_info(); + nr_copies = get_nr_copies(vnode_info); + if (nr_copies > copies) + nr_copies = copies; + if (base_vid) { ret = read_object(vnode_info, epoch, vid_to_vdi_oid(base_vid), (char *)base, - sizeof(*base), 0, copies); + sizeof(*base), 0, nr_copies); if (ret != SD_RES_SUCCESS) { ret = SD_RES_BASE_VDI_READ; goto out_put_vnode_info; @@ -71,7 +76,7 @@ static int create_vdi_obj(uint32_t epoch ret = read_object(vnode_info, epoch, vid_to_vdi_oid(cur_vid), (char *)cur, - SD_INODE_HEADER_SIZE, 0, copies); + SD_INODE_HEADER_SIZE, 0, nr_copies); if (ret != SD_RES_SUCCESS) { vprintf(SDOG_ERR, "failed\n"); ret = SD_RES_BASE_VDI_READ; @@ -114,7 +119,7 @@ static int create_vdi_obj(uint32_t epoch if (is_snapshot && cur_vid != base_vid) { ret = write_object(vnode_info, epoch, vid_to_vdi_oid(cur_vid), (char *)cur, - SD_INODE_HEADER_SIZE, 0, 0, copies, 0); + SD_INODE_HEADER_SIZE, 0, 0, nr_copies, 0); if (ret != 0) { vprintf(SDOG_ERR, "failed\n"); ret = SD_RES_BASE_VDI_READ; @@ -125,7 +130,7 @@ static int create_vdi_obj(uint32_t epoch if (base_vid) { ret = write_object(vnode_info, epoch, vid_to_vdi_oid(base_vid), (char *)base, - SD_INODE_HEADER_SIZE, 0, 0, copies, 0); + SD_INODE_HEADER_SIZE, 0, 0, nr_copies, 0); if (ret != 0) { vprintf(SDOG_ERR, "failed\n"); ret = SD_RES_BASE_VDI_WRITE; @@ -135,7 +140,7 @@ static int create_vdi_obj(uint32_t epoch ret = write_object(vnode_info, epoch, vid_to_vdi_oid(new_vid), (char *)new, sizeof(*new), - 0, 0, copies, 1); + 0, 0, nr_copies, 1); if (ret != 0) ret = SD_RES_VDI_WRITE; @@ -151,12 +156,12 @@ out: static int find_first_vdi(uint32_t epoch, unsigned long start, unsigned long end, char *name, char *tag, uint32_t snapid, uint32_t *vid, unsigned long *deleted_nr, uint32_t *next_snap, - unsigned int *nr_copies, uint64_t *ctime) + unsigned int *inode_nr_copies, uint64_t *ctime) { struct vnode_info *vnode_info; struct sheepdog_inode *inode = NULL; unsigned long i; - int nr_reqs; + int nr_copies; int ret = SD_RES_NO_MEM; int vdi_found = 0; @@ -167,15 +172,12 @@ static int find_first_vdi(uint32_t epoch } vnode_info = get_vnode_info(); - - nr_reqs = sys->nr_sobjs; - if (nr_reqs > vnode_info->nr_zones) - nr_reqs = vnode_info->nr_zones; + nr_copies = get_nr_copies(vnode_info); for (i = start; i >= end; i--) { ret = read_object(vnode_info, epoch, vid_to_vdi_oid(i), (char *)inode, - SD_INODE_HEADER_SIZE, 0, nr_reqs); + SD_INODE_HEADER_SIZE, 0, nr_copies); if (ret != SD_RES_SUCCESS) { ret = SD_RES_EIO; goto out_put_vnode_info; @@ -196,7 +198,7 @@ static int find_first_vdi(uint32_t epoch *next_snap = inode->snap_id + 1; *vid = inode->vdi_id; - *nr_copies = inode->nr_copies; + *inode_nr_copies = inode->nr_copies; if (ctime) *ctime = inode->ctime; ret = SD_RES_SUCCESS; @@ -400,8 +402,9 @@ static LIST_HEAD(deletion_work_list); static int delete_inode(struct deletion_work *dw) { - int nr_reqs, ret = SD_RES_SUCCESS; struct sheepdog_inode *inode = NULL; + int ret = SD_RES_SUCCESS; + int nr_copies; inode = zalloc(sizeof(*inode)); if (!inode) { @@ -409,12 +412,10 @@ static int delete_inode(struct deletion_ goto out; } - nr_reqs = sys->nr_sobjs; - if (nr_reqs > dw->vnodes->nr_zones) - nr_reqs = dw->vnodes->nr_zones; + nr_copies = get_nr_copies(dw->vnodes); ret = read_object(dw->vnodes, dw->epoch, vid_to_vdi_oid(dw->vid), - (char *)inode, SD_INODE_HEADER_SIZE, 0, nr_reqs); + (char *)inode, SD_INODE_HEADER_SIZE, 0, nr_copies); if (ret != SD_RES_SUCCESS) { ret = SD_RES_EIO; goto out; @@ -427,7 +428,7 @@ static int delete_inode(struct deletion_ ret = write_object(dw->vnodes, dw->epoch, vid_to_vdi_oid(dw->vid), (char *)inode, SD_INODE_HEADER_SIZE, 0, 0, - nr_reqs, 0); + nr_copies, 0); if (ret != 0) { ret = SD_RES_EIO; goto out; @@ -445,6 +446,7 @@ static void delete_one(struct work *work uint32_t vdi_id = *(dw->buf + dw->count - dw->done - 1); int ret, i; struct sheepdog_inode *inode = NULL; + int nr_copies; eprintf("%d %d, %16x\n", dw->done, dw->count, vdi_id); @@ -454,9 +456,13 @@ static void delete_one(struct work *work goto out; } + nr_copies = get_nr_copies(dw->vnodes); + if (nr_copies > inode->nr_copies) + nr_copies = inode->nr_copies; + ret = read_object(dw->vnodes, dw->epoch, vid_to_vdi_oid(vdi_id), (void *)inode, sizeof(*inode), - 0, sys->nr_sobjs); + 0, nr_copies); if (ret != SD_RES_SUCCESS) { eprintf("cannot find VDI object\n"); @@ -475,7 +481,7 @@ static void delete_one(struct work *work ret = remove_object(dw->vnodes, dw->epoch, vid_to_data_oid(inode->data_vdi_id[i], i), - inode->nr_copies); + nr_copies); if (ret != SD_RES_SUCCESS) dw->delete_error = 1; @@ -485,8 +491,7 @@ static void delete_one(struct work *work if (dw->delete_error) { write_object(dw->vnodes, dw->epoch, vid_to_vdi_oid(vdi_id), - (void *)inode, sizeof(*inode), 0, 0, - inode->nr_copies, 0); + (void *)inode, sizeof(*inode), 0, 0, nr_copies, 0); } out: @@ -525,6 +530,9 @@ static int fill_vdi_list(struct deletion struct sheepdog_inode *inode = NULL; int done = dw->count; uint32_t vid; + int nr_copies; + + nr_copies = get_nr_copies(dw->vnodes); inode = malloc(SD_INODE_HEADER_SIZE); if (!inode) { @@ -537,7 +545,7 @@ again: vid = dw->buf[done++]; ret = read_object(dw->vnodes, dw->epoch, vid_to_vdi_oid(vid), (char *)inode, SD_INODE_HEADER_SIZE, 0, - sys->nr_sobjs); + nr_copies); if (ret != SD_RES_SUCCESS) { eprintf("cannot find VDI object\n"); @@ -569,6 +577,7 @@ static uint64_t get_vdi_root(struct vnod { int ret; struct sheepdog_inode *inode = NULL; + int nr_copies = get_nr_copies(vnode_info); inode = malloc(SD_INODE_HEADER_SIZE); if (!inode) { @@ -578,7 +587,7 @@ static uint64_t get_vdi_root(struct vnod } next: ret = read_object(vnode_info, epoch, vid_to_vdi_oid(vid), (char *)inode, - SD_INODE_HEADER_SIZE, 0, sys->nr_sobjs); + SD_INODE_HEADER_SIZE, 0, nr_copies); if (vid == inode->vdi_id && inode->snap_id == 1 && inode->parent_vdi_id != 0 @@ -677,12 +686,17 @@ int get_vdi_attr(uint32_t epoch, struct struct sheepdog_vdi_attr tmp_attr; uint64_t oid, hval; uint32_t end; + int nr_copies; int ret; vattr->ctime = ctime; vnode_info = get_vnode_info(); + nr_copies = get_nr_copies(vnode_info); + if (nr_copies > copies) + nr_copies = copies; + /* we cannot include value_len for calculating the hash value */ hval = fnv_64a_buf(vattr->name, sizeof(vattr->name), FNV1A_64_INIT); hval = fnv_64a_buf(vattr->tag, sizeof(vattr->tag), hval); @@ -694,11 +708,12 @@ int get_vdi_attr(uint32_t epoch, struct while (*attrid != end) { oid = vid_to_attr_oid(vid, *attrid); ret = read_object(vnode_info, epoch, oid, (char *)&tmp_attr, - sizeof(tmp_attr), 0, copies); + sizeof(tmp_attr), 0, nr_copies); if (ret == SD_RES_NO_OBJ && wr) { ret = write_object(vnode_info, epoch, oid, - (char *)vattr, data_len, 0, 0, copies, 1); + (char *)vattr, data_len, 0, 0, + nr_copies, 1); if (ret) ret = SD_RES_EIO; else @@ -720,7 +735,7 @@ int get_vdi_attr(uint32_t epoch, struct ret = write_object(vnode_info, epoch, oid, (char *)"", 1, offsetof(struct sheepdog_vdi_attr, name), - 0, copies, 0); + 0, nr_copies, 0); if (ret) ret = SD_RES_EIO; else @@ -728,7 +743,8 @@ int get_vdi_attr(uint32_t epoch, struct } else if (wr) { ret = write_object(vnode_info, epoch, oid, (char *)vattr, - SD_ATTR_OBJ_SIZE, 0, 0, copies, 0); + SD_ATTR_OBJ_SIZE, 0, 0, + nr_copies, 0); if (ret) ret = SD_RES_EIO;