[sheepdog] [PATCH v4 5/6] sheep: cache vnode_info when doing recovery
Liu Yuan
namei.unix at gmail.com
Tue May 20 11:33:36 CEST 2014
On Tue, May 20, 2014 at 04:40:33PM +0800, Robin Dong wrote:
> From: Robin Dong <sanbai at taobao.com>
>
> When sheepdog doing recovery in same low-performance machines, the CPU is
> very high. After using perf tools to check the hot point of performance in
> sheep daemon, we find out that the "alloc_vnode_info()" function cost lots
> of CPU circyles because the rollback_vnode_info() rebuilds the vnode_info
> by calling alloc_vnode_info() too frequently.
>
> The solution is to cache result of alloc_vnode_info() for specific 'epoch'
> and 'nr_nodes' in the recovery context.
>
> Signed-off-by: Robin Dong <sanbai at taobao.com>
> ---
> sheep/group.c | 12 ++++++++++++
> sheep/recovery.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++------
> sheep/sheep_priv.h | 4 ++++
> 3 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/sheep/group.c b/sheep/group.c
> index 63e9ab9..360771d 100644
> --- a/sheep/group.c
> +++ b/sheep/group.c
> @@ -175,6 +175,18 @@ struct vnode_info *get_vnode_info_epoch(uint32_t epoch,
> return alloc_vnode_info(&nroot);
> }
>
> +int get_nodes_epoch(uint32_t epoch, struct vnode_info *cur_vinfo,
> + struct sd_node *nodes, int len)
> +{
> + int nr_nodes;
> +
> + nr_nodes = epoch_log_read(epoch, nodes, len);
> + if (nr_nodes < 0)
> + nr_nodes = epoch_log_read_remote(epoch, nodes, len,
> + NULL, cur_vinfo);
> + return nr_nodes;
> +}
> +
> int local_get_node_list(const struct sd_req *req, struct sd_rsp *rsp,
> void *data)
> {
> diff --git a/sheep/recovery.c b/sheep/recovery.c
> index 27eb6c8..75e4b93 100644
> --- a/sheep/recovery.c
> +++ b/sheep/recovery.c
> @@ -17,6 +17,7 @@ struct recovery_work {
> uint32_t epoch;
> uint32_t tgt_epoch;
>
> + struct recovery_info *rinfo;
> struct vnode_info *old_vinfo;
> struct vnode_info *cur_vinfo;
>
> @@ -71,6 +72,10 @@ struct recovery_info {
>
> struct vnode_info *old_vinfo;
> struct vnode_info *cur_vinfo;
> +
> + int max_epoch;
> + struct vnode_info **vinfo_array;
> + struct sd_mutex vinfo_lock;
> };
>
> static struct recovery_info *next_rinfo;
> @@ -95,23 +100,44 @@ static inline bool node_is_gateway_only(void)
> return sys->this_node.nr_vnodes == 0;
> }
>
> +static inline int vinfo_idx(uint32_t epoch, int nr_nodes)
> +{
> + return epoch * SD_MAX_NODES + nr_nodes;
> +}
> +
> static struct vnode_info *rollback_vnode_info(uint32_t *epoch,
> + struct recovery_info *rinfo,
> struct vnode_info *cur)
> {
> - struct vnode_info *vinfo;
> + struct sd_node nodes[SD_MAX_NODES];
> + int nr_nodes, idx;
> + struct rb_root nroot = RB_ROOT;
> +
> rollback:
> *epoch -= 1;
> if (*epoch < last_gathered_epoch)
> return NULL;
>
> - vinfo = get_vnode_info_epoch(*epoch, cur);
> - if (!vinfo) {
> + nr_nodes = get_nodes_epoch(*epoch, cur, nodes, sizeof(nodes));
> + if (!nr_nodes) {
> /* We rollback in case we don't get a valid epoch */
> sd_alert("cannot get epoch %d", *epoch);
> sd_alert("clients may see old data");
> goto rollback;
> }
> - return vinfo;
> + idx = vinfo_idx(*epoch, nr_nodes);
> + /* double check */
> + if (rinfo->vinfo_array[idx] == NULL) {
> + sd_mutex_lock(&rinfo->vinfo_lock);
> + if (rinfo->vinfo_array[idx] == NULL) {
> + for (int i = 0; i < nr_nodes; i++)
> + rb_insert(&nroot, &nodes[i], rb, node_cmp);
> + rinfo->vinfo_array[idx] = alloc_vnode_info(&nroot);
> + }
> + sd_mutex_unlock(&rinfo->vinfo_lock);
> + }
> + refcount_inc(&(rinfo->vinfo_array[idx]->refcnt));
> + return rinfo->vinfo_array[idx];
> }
>
> /*
> @@ -202,7 +228,8 @@ again:
> break;
> default:
> rollback:
> - new_old = rollback_vnode_info(&tgt_epoch, rw->cur_vinfo);
> + new_old = rollback_vnode_info(&tgt_epoch, rw->rinfo,
> + rw->cur_vinfo);
> if (!new_old) {
> sd_err("can not read %"PRIx64" idx %d", oid, idx);
> free(buf);
> @@ -385,7 +412,8 @@ again:
> /* fall through */
> default:
> /* No luck, roll back to an older configuration and try again */
> - new_old = rollback_vnode_info(&tgt_epoch, rw->cur_vinfo);
> + new_old = rollback_vnode_info(&tgt_epoch, rw->rinfo,
> + rw->cur_vinfo);
> if (!new_old) {
> sd_err("can not recover oid %"PRIx64, oid);
> ret = -1;
> @@ -669,10 +697,19 @@ static void free_recovery_obj_work(struct recovery_obj_work *row)
>
> static void free_recovery_info(struct recovery_info *rinfo)
> {
> + int idx;
> +
> put_vnode_info(rinfo->cur_vinfo);
> put_vnode_info(rinfo->old_vinfo);
> free(rinfo->oids);
> free(rinfo->prio_oids);
> + for (int i = 0; i < rinfo->max_epoch; i++)
> + for (int j = 0; j < SD_MAX_NODES; j++) {
For node as vnode unit, SD_MAX_NODES is very large (>6000), so this loop will
burn cpu, no? I think this line will degrade the performance a bit.
Thanks
Yuan
More information about the sheepdog
mailing list