Currenly we can easily get into a situation where we can't read objects after losing a node in an offline cluster and then doing a manual recovery. To fix this call start_recovery from cluster_manual_recover. Also move get_vnodes_from_epoch into group.c and rename it to fit with the rest of the vnode_info functions now that is is used outside of recovery.c. Signed-off-by: Christoph Hellwig <hch at lst.de> diff --git a/sheep/group.c b/sheep/group.c index c2679f2..a83590c 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -201,7 +201,8 @@ void oid_to_vnodes(struct vnode_info *vnode_info, uint64_t oid, int nr_copies, } } -struct vnode_info *alloc_vnode_info(struct sd_node *nodes, size_t nr_nodes) +static struct vnode_info *alloc_vnode_info(struct sd_node *nodes, + size_t nr_nodes) { struct vnode_info *vnode_info; @@ -218,6 +219,23 @@ struct vnode_info *alloc_vnode_info(struct sd_node *nodes, size_t nr_nodes) return vnode_info; } +struct vnode_info *get_vnode_info_epoch(uint32_t epoch) +{ + struct sd_node nodes[SD_MAX_NODES]; + int nr_nodes; + + nr_nodes = epoch_log_read_nr(epoch, (void *)nodes, sizeof(nodes)); + if (nr_nodes < 0) { + nr_nodes = epoch_log_read_remote(epoch, (void *)nodes, + sizeof(nodes)); + if (nr_nodes == 0) + return NULL; + nr_nodes /= sizeof(nodes[0]); + } + + return alloc_vnode_info(nodes, nr_nodes); +} + int local_get_node_list(const struct sd_req *req, struct sd_rsp *rsp, void *data) { diff --git a/sheep/ops.c b/sheep/ops.c index 27ca07d..89870a1 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -420,6 +420,7 @@ static int local_get_epoch(struct request *req) static int cluster_manual_recover(const struct sd_req *req, struct sd_rsp *rsp, void *data) { + struct vnode_info *old_vnode_info, *vnode_info; int ret = SD_RES_SUCCESS; uint8_t c; uint16_t f; @@ -429,21 +430,25 @@ static int cluster_manual_recover(const struct sd_req *req, struct sd_rsp *rsp, * 2) some nodes are physically down (same epoch condition). * In both case, the nodes(s) stat is WAIT_FOR_JOIN. */ - if (!sys_stat_wait_join()) { - ret = SD_RES_MANUAL_RECOVER; - goto out; - } + if (!sys_stat_wait_join()) + return SD_RES_MANUAL_RECOVER; ret = get_cluster_copies(&c); if (ret) - goto out; + return ret; ret = get_cluster_flags(&f); if (ret) - goto out; + return ret; sys->nr_copies = c; sys->flags = f; + old_vnode_info = get_vnode_info_epoch(sys->epoch); + if (!old_vnode_info) { + eprintf("cannot get vnode info for epoch %d\n", sys->epoch); + return SD_RES_EIO; + } + sys->epoch++; /* some nodes are left, so we get a new epoch */ ret = log_current_epoch(); if (ret) { @@ -456,7 +461,12 @@ static int cluster_manual_recover(const struct sd_req *req, struct sd_rsp *rsp, sys_stat_set(SD_STATUS_OK); else sys_stat_set(SD_STATUS_HALT); + + vnode_info = get_vnode_info(); + start_recovery(vnode_info, old_vnode_info); + put_vnode_info(vnode_info); out: + put_vnode_info(old_vnode_info); return ret; } diff --git a/sheep/recovery.c b/sheep/recovery.c index 9b715ea..64309df 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -55,23 +55,6 @@ static int obj_cmp(const void *oid1, const void *oid2) return 0; } -static struct vnode_info *get_vnodes_from_epoch(uint32_t epoch) -{ - struct sd_node nodes[SD_MAX_NODES]; - int nr_nodes; - - nr_nodes = epoch_log_read_nr(epoch, (void *)nodes, sizeof(nodes)); - if (nr_nodes < 0) { - nr_nodes = epoch_log_read_remote(epoch, (void *)nodes, - sizeof(nodes)); - if (nr_nodes == 0) - return NULL; - nr_nodes /= sizeof(nodes[0]); - } - - return alloc_vnode_info(nodes, nr_nodes); -} - static int recover_object_from_replica(uint64_t oid, struct sd_vnode *entry, uint32_t epoch, uint32_t tgt_epoch) @@ -225,7 +208,7 @@ again: goto err; } - new_old = get_vnodes_from_epoch(tgt_epoch); + new_old = get_vnode_info_epoch(tgt_epoch); if (!new_old) { ret = -1; goto err; diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 60432c7..45d3852 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -232,10 +232,10 @@ int local_get_node_list(const struct sd_req *req, struct sd_rsp *rsp, void *data); bool have_enough_zones(void); -struct vnode_info *alloc_vnode_info(struct sd_node *nodes, size_t nr_nodes); struct vnode_info *grab_vnode_info(struct vnode_info *vnode_info); struct vnode_info *get_vnode_info(void); void put_vnode_info(struct vnode_info *vnodes); +struct vnode_info *get_vnode_info_epoch(uint32_t epoch); struct sd_vnode *oid_to_vnode(struct vnode_info *vnode_info, uint64_t oid, int copy_idx); |