[sheepdog] [PATCH 5/8] make IO request to wait when the requested object is in recovery

Yibin Shen zituan at taobao.com
Tue May 22 10:40:06 CEST 2012


if a socket timeout for some reason(maybe blocked by target node too long),
source node will get a SD_RES_NETWORK_ERROR error,
then io_op_done() will re-send this request, we should consider such
case carefully.

On Tue, May 22, 2012 at 10:51 AM, levin li <levin108 at gmail.com> wrote:
> From: levin li <xingke.lwp at taobao.com>
>
> When an object requested is in recovery, we should put it into
> the wait_obj_queue to make the request wait until the object is
> recovered by the recovery work.
>
> Signed-off-by: levin li <xingke.lwp at taobao.com>
> ---
>  sheep/group.c      |    1 +
>  sheep/recovery.c   |    5 ++++-
>  sheep/sdnet.c      |   23 +++++++++++++++++++----
>  sheep/sheep_priv.h |    2 ++
>  4 files changed, 26 insertions(+), 5 deletions(-)
>
> diff --git a/sheep/group.c b/sheep/group.c
> index 63742a2..35382f7 100644
> --- a/sheep/group.c
> +++ b/sheep/group.c
> @@ -1380,6 +1380,7 @@ int create_cluster(int port, int64_t zone, int nr_vnodes)
>        INIT_LIST_HEAD(&sys->event_queue);
>        INIT_LIST_HEAD(&sys->wait_epoch_queue);
>        INIT_LIST_HEAD(&sys->wait_rw_queue);
> +       INIT_LIST_HEAD(&sys->wait_obj_queue);
>
>        ret = send_join_request(&sys->this_node);
>        if (ret != 0)
> diff --git a/sheep/recovery.c b/sheep/recovery.c
> index 0d05661..179937b 100644
> --- a/sheep/recovery.c
> +++ b/sheep/recovery.c
> @@ -598,7 +598,7 @@ static void do_recover_main(struct work *work)
>        struct recovery_work *rw = container_of(work, struct recovery_work, work);
>        struct object_rb_entry *entry;
>        struct rb_node *node;
> -       uint64_t oid = 0;
> +       uint64_t oid = 0, recovered_oid = rw->oid_to_recovery;
>
>  again:
>        if (rw->prior_count == 0) {
> @@ -627,6 +627,9 @@ again:
>
>        rw->oid_to_recovery = oid;
>
> +       if (recovered_oid)
> +               resume_retry_requests(recovered_oid);
> +
>        if (rw->retry && !next_rw) {
>                rw->retry = 0;
>
> diff --git a/sheep/sdnet.c b/sheep/sdnet.c
> index 565625e..b0724be 100644
> --- a/sheep/sdnet.c
> +++ b/sheep/sdnet.c
> @@ -231,10 +231,8 @@ static int check_request(struct request *req)
>                                list_add_tail(&req->request_list,
>                                                &sys->wait_rw_queue);
>                        else
> -                               req->rp.result = SD_RES_NEW_NODE_VER;
> -                               sys->nr_outstanding_io++;
> -                               req->work.done(&req->work);
> -                       }
> +                               list_add_tail(&req->request_list,
> +                                               &sys->wait_obj_queue);
>                } else {
>                        /* Gateway request */
>                        list_add_tail(&req->request_list, &sys->req_wait_for_obj_list);
> @@ -292,6 +290,23 @@ void resume_wait_epoch_requests(void)
>        }
>  }
>
> +void resume_retry_requests(uint64_t oid)
> +{
> +       struct request *req, *t;
> +
> +       list_for_each_entry_safe(req, t, &sys->wait_obj_queue,
> +                       request_list) {
> +               /* the object requested by a pending request has been
> +                * recovered, notify the pending request. */
> +               if (req->local_oid == oid) {
> +                       dprintf("retry %" PRIx64 "\n", req->local_oid);
> +                       list_del(&req->request_list);
> +                       list_add_tail(&req->request_list, &sys->request_queue);
> +                       process_request_event_queues();
> +               }
> +       }
> +}
> +
>  static void queue_request(struct request *req)
>  {
>        struct sd_req *hdr = &req->rq;
> diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
> index 5e804c4..8a5a2dc 100644
> --- a/sheep/sheep_priv.h
> +++ b/sheep/sheep_priv.h
> @@ -138,6 +138,7 @@ struct cluster_info {
>        struct list_head event_queue;
>        struct list_head wait_epoch_queue;
>        struct list_head wait_rw_queue;
> +       struct list_head wait_obj_queue;
>        struct event_struct *cur_cevent;
>        int nr_outstanding_io;
>        int nr_outstanding_reqs;
> @@ -266,6 +267,7 @@ int is_access_to_busy_objects(uint64_t oid);
>
>  void resume_pending_requests(void);
>  void resume_wait_epoch_requests(void);
> +void resume_retry_requests(uint64_t oid);
>
>  int create_cluster(int port, int64_t zone, int nr_vnodes);
>  int leave_cluster(void);
> --
> 1.7.10
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog

________________________________

This email (including any attachments) is confidential and may be legally privileged. If you received this email in error, please delete it immediately and do not copy it or use it for any purpose or disclose its contents to any other person. Thank you.

本电邮(包括任何附件)可能含有机密资料并受法律保护。如您不是正确的收件人,请您立即删除本邮件。请不要将本电邮进行复制并用作任何其他用途、或透露本邮件之内容。谢谢。



More information about the sheepdog mailing list