[sheepdog] [PATCH v1 5/5] sheep: cache vnode_info when doing recovery
Robin Dong
robin.k.dong at gmail.com
Tue Apr 29 04:53:45 CEST 2014
From: Robin Dong <sanbai at taobao.com>
When sheepdog doing recovery in same low-performance machines, the CPU is
very high. After using perf tools to check the hot point of performance in
sheep daemon, we find out that the "alloc_vnode_info()" function cost lots
of CPU circyles because the rollback_vnode_info() rebuilds the vnode_info
by calling alloc_vnode_info() too frequently.
The solution is to cache result of alloc_vnode_info() for specific 'epoch'
and 'nr_nodes' in the recovery context.
Signed-off-by: Robin Dong <sanbai at taobao.com>
---
sheep/group.c | 12 ++++++++++++
sheep/recovery.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
sheep/sheep_priv.h | 4 ++++
3 files changed, 58 insertions(+), 4 deletions(-)
diff --git a/sheep/group.c b/sheep/group.c
index 91cc35a..43f215f 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -182,6 +182,18 @@ struct vnode_info *get_vnode_info_epoch(uint32_t epoch,
return alloc_vnode_info(&nroot);
}
+int get_nodes_epoch(uint32_t epoch, struct vnode_info *cur_vinfo,
+ struct sd_node *nodes, int len)
+{
+ int nr_nodes;
+
+ nr_nodes = epoch_log_read(epoch, nodes, sizeof(nodes));
+ if (nr_nodes < 0)
+ nr_nodes = epoch_log_read_remote(epoch, nodes, len,
+ NULL, cur_vinfo);
+ return nr_nodes;
+}
+
int local_get_node_list(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 6008a0b..3616f0a 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -71,6 +71,10 @@ struct recovery_info {
struct vnode_info *old_vinfo;
struct vnode_info *cur_vinfo;
+
+ int max_epoch;
+ struct vnode_info **vinfo_array;
+ struct sd_mutex vinfo_lock;
};
static struct recovery_info *next_rinfo;
@@ -97,23 +101,44 @@ static inline bool node_is_gateway_only(void)
return sys->this_node.nr_vnodes == 0;
}
+static inline int vinfo_idx(uint32_t epoch, int nr_nodes)
+{
+ return epoch * SD_MAX_NODES + nr_nodes;
+}
+
static struct vnode_info *rollback_vnode_info(uint32_t *epoch,
struct vnode_info *cur)
{
- struct vnode_info *vinfo;
+ struct recovery_info *rinfo = main_thread_get(current_rinfo);
+ struct sd_node nodes[SD_MAX_NODES];
+ int nr_nodes, idx;
+ struct rb_root nroot = RB_ROOT;
+
rollback:
*epoch -= 1;
if (*epoch < last_gathered_epoch)
return NULL;
- vinfo = get_vnode_info_epoch(*epoch, cur);
- if (!vinfo) {
+ nr_nodes = get_nodes_epoch(*epoch, cur, nodes, sizeof(nodes));
+ if (!nr_nodes) {
/* We rollback in case we don't get a valid epoch */
sd_alert("cannot get epoch %d", *epoch);
sd_alert("clients may see old data");
goto rollback;
}
- return vinfo;
+ idx = vinfo_idx(*epoch, nr_nodes);
+ /* double check */
+ if (rinfo->vinfo_array[idx] == NULL) {
+ sd_mutex_lock(&rinfo->vinfo_lock);
+ if (rinfo->vinfo_array[idx] == NULL) {
+ for (int i = 0; i < nr_nodes; i++)
+ rb_insert(&nroot, &nodes[i], rb, node_cmp);
+ rinfo->vinfo_array[idx] = alloc_vnode_info(&nroot);
+ }
+ sd_mutex_unlock(&rinfo->vinfo_lock);
+ }
+ refcount_inc(&(rinfo->vinfo_array[idx]->refcnt));
+ return rinfo->vinfo_array[idx];
}
/*
@@ -671,10 +696,19 @@ static void free_recovery_obj_work(struct recovery_obj_work *row)
static void free_recovery_info(struct recovery_info *rinfo)
{
+ int idx;
+
put_vnode_info(rinfo->cur_vinfo);
put_vnode_info(rinfo->old_vinfo);
free(rinfo->oids);
free(rinfo->prio_oids);
+ for (int i = 0; i < rinfo->max_epoch; i++)
+ for (int j = 0; j < SD_MAX_NODES; j++) {
+ idx = vinfo_idx(i, j);
+ put_vnode_info(rinfo->vinfo_array[idx]);
+ }
+ free(rinfo->vinfo_array);
+ sd_destroy_mutex(&rinfo->vinfo_lock);
free(rinfo);
}
@@ -1071,6 +1105,10 @@ int start_recovery(struct vnode_info *cur_vinfo, struct vnode_info *old_vinfo,
rinfo->tgt_epoch = epoch_lifted ? sys->cinfo.epoch - 1 :
sys->cinfo.epoch;
rinfo->count = 0;
+ rinfo->max_epoch = sys->cinfo.epoch;
+ rinfo->vinfo_array = xzalloc(sizeof(struct vnode_info *) *
+ rinfo->max_epoch * SD_MAX_NODES);
+ sd_init_mutex(&rinfo->vinfo_lock);
if (epoch_lifted)
rinfo->notify_complete = true; /* Reweight or node recovery */
else
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 693171c..f405b75 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -346,12 +346,16 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len, uint32_t vid,
int local_get_node_list(const struct sd_req *req, struct sd_rsp *rsp,
void *data);
+void reset_vinfo_array(void);
struct vnode_info *grab_vnode_info(struct vnode_info *vnode_info);
struct vnode_info *get_vnode_info(void);
void put_vnode_info(struct vnode_info *vinfo);
struct vnode_info *alloc_vnode_info(const struct rb_root *);
struct vnode_info *get_vnode_info_epoch(uint32_t epoch,
struct vnode_info *cur_vinfo);
+int get_nodes_epoch(uint32_t epoch, struct vnode_info *cur_vinfo,
+ struct sd_node *nodes, int len);
+
void wait_get_vdis_done(void);
int get_nr_copies(struct vnode_info *vnode_info);
--
1.7.12.4
More information about the sheepdog
mailing list