From: levin li <xingke.lwp at taobao.com> The normal requests from QEMU include the copies number in its header, QEMU stores the nr_copies the first time it reads the inode data, but many other local requests such as read_copy_from_replica and recover_object_from_replica don't know the copies number for every object, so this tree is necessary to keep the copies number Signed-off-by: levin li <xingke.lwp at taobao.com> --- include/sheepdog_proto.h | 5 ++ sheep/ops.c | 4 +- sheep/sheep_priv.h | 5 ++ sheep/vdi.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+), 1 deletions(-) diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index 7d96d63..2859844 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -277,4 +277,9 @@ static inline uint64_t vid_to_attr_oid(uint32_t vid, uint32_t attrid) return ((uint64_t)vid << VDI_SPACE_SHIFT) | VDI_ATTR_BIT | attrid; } +static inline uint32_t attr_oid_to_vid(uint64_t oid) +{ + return (~VDI_ATTR_BIT & oid) >> VDI_SPACE_SHIFT; +} + #endif diff --git a/sheep/ops.c b/sheep/ops.c index 8e8caa0..c9bd78b 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -134,8 +134,10 @@ static int post_cluster_new_vdi(const struct sd_req *req, struct sd_rsp *rsp, int ret = rsp->result; vprintf(SDOG_INFO, "done %d %ld\n", ret, nr); - if (ret == SD_RES_SUCCESS) + if (ret == SD_RES_SUCCESS) { set_bit(nr, sys->vdi_inuse); + add_vdi_copy_number(nr, rsp->vdi.copies); + } return ret; } diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 58a9648..b9d2721 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -200,6 +200,11 @@ int create_listen_port(int port, void *data); int init_store(const char *dir, int enable_write_cache); int init_base_path(const char *dir); +int get_vdi_copy_number(uint32_t vid); +int get_obj_copy_number(uint64_t oid); +int get_max_copy_number(void); +int get_req_copy_number(struct request *req); +int add_vdi_copy_number(uint32_t vid, int nr_copies); int vdi_exist(uint32_t vid); int add_vdi(struct vdi_iocb *iocb, uint32_t *new_vid); diff --git a/sheep/vdi.c b/sheep/vdi.c index 3c8bef2..7931026 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -10,11 +10,135 @@ */ #include <stdio.h> #include <stdlib.h> +#include <pthread.h> #include <sys/time.h> #include "sheepdog_proto.h" #include "sheep_priv.h" +struct vdi_copy_entry { + uint32_t vid; + unsigned int nr_copies; + struct rb_node node; +}; + +uint32_t max_copies = 0; +static struct rb_root vdi_copy_root = RB_ROOT; +pthread_rwlock_t vdi_copy_lock = PTHREAD_RWLOCK_INITIALIZER; + +static struct vdi_copy_entry *vdi_copy_search(struct rb_root *root, + uint32_t vid) +{ + struct rb_node *n = root->rb_node; + struct vdi_copy_entry *t; + + while (n) { + t = rb_entry(n, struct vdi_copy_entry, node); + + if (vid < t->vid) + n = n->rb_left; + else if (vid > t->vid) + n = n->rb_right; + else + return t; + } + + return NULL; +} + +static struct vdi_copy_entry *vdi_copy_insert(struct rb_root *root, + struct vdi_copy_entry *new) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct vdi_copy_entry *entry; + + while (*p) { + parent = *p; + entry = rb_entry(parent, struct vdi_copy_entry, node); + + if (new->vid < entry->vid) + p = &(*p)->rb_left; + else if (new->vid > entry->vid) + p = &(*p)->rb_right; + else + return entry; /* already has this entry */ + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, root); + + return NULL; /* insert successfully */ +} + +int get_vdi_copy_number(uint32_t vid) +{ + struct vdi_copy_entry *entry; + + pthread_rwlock_rdlock(&vdi_copy_lock); + entry = vdi_copy_search(&vdi_copy_root, vid); + pthread_rwlock_unlock(&vdi_copy_lock); + + if (!entry) { + eprintf("No VDI copy entry for %" PRIx32 " found\n", vid); + return 0; + } + + return entry->nr_copies; +} + +int get_obj_copy_number(uint64_t oid) +{ + uint32_t vid; + if (is_vdi_attr_obj(oid)) + vid = attr_oid_to_vid(oid); + else + vid = oid_to_vid(oid); + + return get_vdi_copy_number(vid); +} + +int get_req_copy_number(struct request *req) +{ + int nr_copies; + + nr_copies = req->rq.obj.copies; + if (!nr_copies) + nr_copies = get_obj_copy_number(req->rq.obj.oid); + + return nr_copies; +} + +int get_max_copy_number(void) +{ + return uatomic_read(&max_copies); +} + +int add_vdi_copy_number(uint32_t vid, int nr_copies) +{ + struct vdi_copy_entry *entry, *old; + + entry = xzalloc(sizeof(*entry)); + entry->vid = vid; + entry->nr_copies = nr_copies; + + dprintf("%" PRIx32 ", %d\n", vid, nr_copies); + + pthread_rwlock_wrlock(&vdi_copy_lock); + old = vdi_copy_insert(&vdi_copy_root, entry); + if (old) { + free(entry); + entry = old; + entry->nr_copies = nr_copies; + } + + if (uatomic_read(&max_copies) == 0 || + nr_copies > uatomic_read(&max_copies)) + uatomic_set(&max_copies, nr_copies); + pthread_rwlock_unlock(&vdi_copy_lock); + + return SD_RES_SUCCESS; +} + int vdi_exist(uint32_t vid) { struct sheepdog_inode *inode; -- 1.7.1 |