[Sheepdog] [PATCH] sheep: fix race on sys->pending_list

Wed May 9 04:23:09 CEST 2012

On Tue, May 8, 2012 at 11:02 PM, Christoph Hellwig <hch at infradead.org> wrote:
> On Tue, May 08, 2012 at 08:08:20PM +0800, Yunkai Zhang wrote:
>> > Also is there any good reason to have the wrapper here? ?The comment
>> > doesn't seem very helpful anymore either.
>> >
>> I just want to use this function's name so that it can keep consistent
>> with work.done, maybe I can assign do_cluster_request to work.done
>> directly?
>
> See the below patch for what I meant.  Note the uncommented assert
> in do_cluster_op which would fail if it was enabled.
>
>
> ---
>  sheep/group.c      |   10 ++++++----
>  sheep/sdnet.c      |    8 --------
>  sheep/sheep_priv.h |    1 -
>  3 files changed, 6 insertions(+), 13 deletions(-)
>
> Index: sheepdog/sheep/group.c
> ===================================================================
> --- sheepdog.orig/sheep/group.c 2012-05-08 16:48:20.435975071 +0200
> +++ sheepdog/sheep/group.c      2012-05-08 16:51:33.327973227 +0200
> @@ -238,6 +238,12 @@ int get_nr_copies(struct vnode_info *vno
>        return min(vnode_info->nr_zones, sys->nr_copies);
>  }
>
> +/*
> + * Perform a blocked cluster operation.
> + *
> + * Must run in the main thread as it access unlocked state like
> + * sys->pending_list.
> + */
>  static void do_cluster_op(void *arg)
>  {
>        struct vdi_op_message *msg = arg;
> @@ -245,6 +251,8 @@ static void do_cluster_op(void *arg)
>        struct request *req;
>        void *data;
>
> +//     assert(is_main_thread());
> +
>        req = list_first_entry(&sys->pending_list, struct request, pending_list);
>
>        if (has_process_main(req->op))
> @@ -258,13 +266,21 @@ static void do_cluster_op(void *arg)
>        msg->rsp.result = ret;
>  }
>
> -void do_cluster_request(struct work *work)
> +/*
> + * Execute a cluster operation by letting the cluster driver send it to all
> + * nodes in the cluster.
> + *
> + * Must run in the main thread as it access unlocked state like
> + * sys->pending_list.
> + */
> +static void do_cluster_request(struct request *req)
>  {
> -       struct request *req = container_of(work, struct request, work);
> -       struct sd_req *hdr = (struct sd_req *)&req->rq;
> +       struct sd_req *hdr = &req->rq;
>        struct vdi_op_message *msg;
>        size_t size;
>
> +       assert(is_main_thread());
> +
>        eprintf("%p %x\n", req, hdr->opcode);
>
>        if (has_process_main(req->op))
> @@ -671,11 +687,19 @@ static void __sd_notify_done(struct even
>        req_done(req);
>  }
>
> +/*
> + * Pass on a notification message from the cluster driver.
> + *
> + * Must run in the main thread as it access unlocked state like
> + * sys->pending_list.
> + */
>  void sd_notify_handler(struct sd_node *sender, void *msg, size_t msg_len)
>  {
>        struct event_struct *cevent;
>        struct work_notify *w;
>
> +       assert(is_main_thread());
> +
>        dprintf("size: %zd, from: %s\n", msg_len, node_to_str(sender));
>
>        w = zalloc(sizeof(*w));
> @@ -1085,8 +1109,16 @@ static void process_request_queue(void)
>                                queue_work(sys->io_wqueue, &req->work);
>                        else
>                                queue_work(sys->gateway_wqueue, &req->work);
> -               } else /* (is_cluster_op(req->op) || is_local_op(req->op)) */
> +               } else if (is_cluster_op(req->op)) {
> +                       /*
> +                        * Cluster requests are handed off to the cluster driver
> +                        * directly from the main thread.  It's the cluster
> +                        * drivers job to ensure we avoid blocking on I/O here.
> +                        */
> +                       do_cluster_request(req);
> +               } else { /* is_local_op(req->op) */
>                        queue_work(sys->io_wqueue, &req->work);
> +               }
>        }
>  }
>
> Index: sheepdog/sheep/sdnet.c
> ===================================================================
> --- sheepdog.orig/sheep/sdnet.c 2012-05-08 16:48:20.435975071 +0200
> +++ sheepdog/sheep/sdnet.c      2012-05-08 16:50:33.583973798 +0200
> @@ -171,11 +171,6 @@ static void local_op_done(struct work *w
>        req_done(req);
>  }
>
> -static void cluster_op_done(struct work *work)
> -{
> -       /* request is forwarded to cpg group */
> -}
> -
>  static void do_local_request(struct work *work)
>  {
>        struct request *req = container_of(work, struct request, work);
> @@ -322,8 +317,7 @@ static void queue_request(struct request
>                req->work.fn = do_local_request;
>                req->work.done = local_op_done;
>        } else if (is_cluster_op(req->op)) {
> -               req->work.fn = do_cluster_request;
> -               req->work.done = cluster_op_done;
> +               /* directly executed in the main thread */;
>        } else {
>                eprintf("unknown operation %d\n", hdr->opcode);
>                rsp->result = SD_RES_SYSTEM_ERROR;
> Index: sheepdog/sheep/sheep_priv.h
> ===================================================================
> --- sheepdog.orig/sheep/sheep_priv.h    2012-05-08 16:48:20.435975071 +0200
> +++ sheepdog/sheep/sheep_priv.h 2012-05-08 16:48:42.647974855 +0200
> @@ -274,7 +274,6 @@ int forward_write_obj_req(struct request
>
>  int read_epoch(uint32_t *epoch, uint64_t *ctime,
>               struct sd_node *entries, int *nr_entries);
> -void do_cluster_request(struct work *work);
>
>  int update_epoch_store(uint32_t epoch);
>  int update_epoch_log(uint32_t epoch);

Well, it should be a good solution.

Can you give a patch?

-- 
Yunkai Zhang
Work at Taobao