[sheepdog] [PATCH RFC 1/2] sheep: enhance STAT_RECOVERY for prividing information of recovery progress
Liu Yuan
namei.unix at gmail.com
Mon Jul 29 10:02:30 CEST 2013
On Mon, Jul 29, 2013 at 04:39:26PM +0900, Hitoshi Mitake wrote:
> Current SD_OP_STAT_RECOVERY only provides an information that
> indicates the node is doing recovery or not. For providing more useful
> information about progress of recovery, this patch enhances this
> request.
>
> The enhanced SD_OP_STAT_RECOVERY returns the information with struct
> recovery_progress. The struct contains these information:
> 1. state of recovery. With this information, collie can know which
> phase of progress sheep is in.
> 2. Number of copied object during a recovery process.
> 3. Number of entire objects which should be copied during a recovery
> process.
>
> Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
> ---
> include/internal_proto.h | 12 ++++++++++++
> sheep/ops.c | 11 ++++++++---
> sheep/recovery.c | 33 ++++++++++++++++++++++++---------
> sheep/sheep_priv.h | 1 +
> 4 files changed, 45 insertions(+), 12 deletions(-)
>
> diff --git a/include/internal_proto.h b/include/internal_proto.h
> index 0463eae..6d7a0e9 100644
> --- a/include/internal_proto.h
> +++ b/include/internal_proto.h
> @@ -184,6 +184,18 @@ struct sd_md_info {
> int nr;
> };
>
> +enum rw_state {
> + RW_PREPARE_LIST, /* the recovery thread is preparing object list */
> + RW_RECOVER_OBJ, /* the thread is recoering objects */
> + RW_NOTIFY_COMPLETION, /* the thread is notifying recovery completion */
> +};
> +
> +struct recovery_progress {
> + uint32_t state; /* actual type is enum rw_state */
enum rw_state state; then you don't need comment.
> + uint32_t nr_recovered_objects;
> + uint32_t nr_entire_checked_objects;
> +};
it's recovery information, so better name it as recovery_info. And
nr_recovered_objects --> finished_count
nr_entire_checked_objects --> total
> static inline __attribute__((used)) void __sd_epoch_format_build_bug_ons(void)
> {
> /* never called, only for checking BUILD_BUG_ON()s */
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 5d7686c..1436664 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -390,10 +390,15 @@ static int local_stat_sheep(struct request *req)
> static int local_stat_recovery(const struct sd_req *req, struct sd_rsp *rsp,
> void *data)
> {
> - if (node_in_recovery())
> - return SD_RES_NODE_IN_RECOVERY;
> + if (!node_in_recovery())
> + return SD_RES_SUCCESS;
>
> - return SD_RES_SUCCESS;
> + if (req->data_length == sizeof(struct recovery_progress)) {
> + fill_recovery_progress(data);
> + rsp->data_length = sizeof(struct recovery_progress);
> + }
> +
> + return SD_RES_NODE_IN_RECOVERY;
> }
>
> static int local_stat_cluster(struct request *req)
> diff --git a/sheep/recovery.c b/sheep/recovery.c
> index ea3101a..69b07cd 100644
> --- a/sheep/recovery.c
> +++ b/sheep/recovery.c
> @@ -11,12 +11,6 @@
>
> #include "sheep_priv.h"
>
> -enum rw_state {
> - RW_PREPARE_LIST, /* the recovery thread is preparing object list */
> - RW_RECOVER_OBJ, /* the thread is recoering objects */
> - RW_NOTIFY_COMPLETION, /* the thread is notifying recovery completion */
> -};
> -
> /* base structure for the recovery thread */
> struct recovery_work {
> uint32_t epoch;
> @@ -86,6 +80,9 @@ static void queue_recovery_work(struct recovery_info *rinfo);
> #define DEFAULT_LIST_BUFFER_SIZE (UINT64_C(1) << 22)
> static size_t list_buffer_size = DEFAULT_LIST_BUFFER_SIZE;
>
> +static enum rw_state state;
> +static uint32_t nr_recovered_objects, nr_entire_checked_objects;
> +
> static int obj_cmp(const uint64_t *oid1, const uint64_t *oid2)
> {
> const uint64_t hval1 = fnv_64a_buf(oid1, sizeof(*oid1), FNV1A_64_INIT);
> @@ -316,7 +313,7 @@ static void recover_object_work(struct work *work)
>
> if (sd_store->exist(oid)) {
> sd_dprintf("the object is already recovered");
> - return;
> + goto end;
> }
>
> /* find object in the stale directory */
> @@ -332,6 +329,9 @@ static void recover_object_work(struct work *work)
> ret = do_recover_object(row);
> if (ret < 0)
> sd_eprintf("failed to recover object %"PRIx64, oid);
> +
> +end:
It is better use 'out' lable as other code.
> + uatomic_inc(&nr_recovered_objects);
> }
>
> bool node_in_recovery(void)
> @@ -499,12 +499,12 @@ static void notify_recovery_completion_main(struct work *work)
> static inline void finish_recovery(struct recovery_info *rinfo)
> {
> uint32_t recovered_epoch = rinfo->epoch;
> - main_thread_set(current_rinfo, NULL);
>
> + main_thread_set(current_rinfo, NULL);
> wakeup_all_requests();
>
> if (rinfo->notify_complete) {
> - rinfo->state = RW_NOTIFY_COMPLETION;
> + state = rinfo->state = RW_NOTIFY_COMPLETION;
> queue_recovery_work(rinfo);
> }
>
> @@ -668,6 +668,9 @@ static void finish_object_list(struct work *work)
> return;
> }
>
> + state = RW_RECOVER_OBJ;
> + uatomic_set(&nr_entire_checked_objects, rinfo->count);
> +
> recover_next_object(rinfo);
> return;
> }
> @@ -853,6 +856,10 @@ static void queue_recovery_work(struct recovery_info *rinfo)
> rlw = xzalloc(sizeof(*rlw));
> rlw->oids = xmalloc(list_buffer_size);
>
> + state = RW_PREPARE_LIST;
> + uatomic_set(&nr_recovered_objects, 0);
> + uatomic_set(&nr_entire_checked_objects, 0);
> +
> rw = &rlw->base;
> rw->work.fn = prepare_object_list;
> rw->work.done = finish_object_list;
> @@ -882,3 +889,11 @@ static void queue_recovery_work(struct recovery_info *rinfo)
>
> queue_work(sys->recovery_wqueue, &rw->work);
> }
> +
> +void fill_recovery_progress(struct recovery_progress *prog)
> +{
> + prog->state = state;
> + prog->nr_recovered_objects = uatomic_read(&nr_recovered_objects);
> + prog->nr_entire_checked_objects =
> + uatomic_read(&nr_entire_checked_objects);
> +}
just ->nr_recovered_objects = rinfo->done isn't enough?
the same goes for nr_entire_checked_objects.
global variables aren't necessary.
Better name the function as get_recovery_info()
Thanks
Yuan
More information about the sheepdog
mailing list