[sheepdog] [PATCH 1/2] sheep: introduce strict mode for write

Wed Dec 11 03:59:27 CET 2013

Why not adding the check in gateway_forward_request() ? I think we should
check the copy number before send forward the request out.

2013/12/10 Liu Yuan <namei.unix at gmail.com>

> We make sure we write the exact number of copies to honor the promise of
> the
> redundancy for "strict mode". This means that after writing of targeted
> data,
> they are redundant as promised and can withstand the random node failures.
>
> For example, with a 4:2 policy, we need at least write to 6 nodes with data
> strip and parity strips. For non-strict mode, we allow to write
> successfully
> only if the data are written fully with 4 nodes alive.
>
> Signed-off-by: Liu Yuan <namei.unix at gmail.com>
> ---
>  dog/cluster.c            |   10 +++++++++-
>  include/internal_proto.h |    2 ++
>  include/sheep.h          |    3 ++-
>  include/sheepdog_proto.h |    2 +-
>  sheep/ops.c              |    2 +-
>  sheep/request.c          |   34 +++++++++++++++++++++++++++++++---
>  6 files changed, 46 insertions(+), 7 deletions(-)
>
> diff --git a/dog/cluster.c b/dog/cluster.c
> index 611c91d..43df232 100644
> --- a/dog/cluster.c
> +++ b/dog/cluster.c
> @@ -21,6 +21,8 @@ static struct sd_option cluster_options[] = {
>         {'b', "store", true, "specify backend store"},
>         {'c', "copies", true, "specify the default data redundancy (number
> of copies)"},
>         {'f', "force", false, "do not prompt for confirmation"},
> +       {'t', "strict", false,
> +        "do not serve write request if number of nodes is not
> sufficient"},
>         {'s', "backend", false, "show backend store information"},
>         { 0, NULL, false, NULL },
>  };
> @@ -30,6 +32,7 @@ static struct cluster_cmd_data {
>         uint8_t copy_policy;
>         bool force;
>         bool show_store;
> +       bool strict;
>         char name[STORE_LEN];
>  } cluster_cmd_data;
>
> @@ -117,6 +120,8 @@ static int cluster_format(int argc, char **argv)
>                 pstrcpy(store_name, STORE_LEN, DEFAULT_STORE);
>         hdr.data_length = strlen(store_name) + 1;
>         hdr.flags |= SD_FLAG_CMD_WRITE;
> +       if (cluster_cmd_data.strict)
> +               hdr.cluster.flags |= SD_CLUSTER_FLAG_STRICT;
>
>         printf("using backend %s store\n", store_name);
>         ret = dog_exec_req(&sd_nid, &hdr, store_name);
> @@ -552,7 +557,7 @@ static int cluster_check(int argc, char **argv)
>  static struct subcommand cluster_cmd[] = {
>         {"info", NULL, "aprhs", "show cluster information",
>          NULL, CMD_NEED_NODELIST, cluster_info, cluster_options},
> -       {"format", NULL, "bcaph", "create a Sheepdog store",
> +       {"format", NULL, "bctaph", "create a Sheepdog store",
>          NULL, CMD_NEED_NODELIST, cluster_format, cluster_options},
>         {"shutdown", NULL, "aph", "stop Sheepdog",
>          NULL, 0, cluster_shutdown, cluster_options},
> @@ -597,6 +602,9 @@ static int cluster_parser(int ch, const char *opt)
>         case 's':
>                 cluster_cmd_data.show_store = true;
>                 break;
> +       case 't':
> +               cluster_cmd_data.strict = true;
> +               break;
>         }
>
>         return 0;
> diff --git a/include/internal_proto.h b/include/internal_proto.h
> index b224c49..ac4e3f8 100644
> --- a/include/internal_proto.h
> +++ b/include/internal_proto.h
> @@ -126,6 +126,8 @@
>  #define SD_RES_CLUSTER_ERROR    0x91 /* Cluster driver error */
>  #define SD_RES_OBJ_TAKEN        0x92 /* Object ID is taken up */
>
> +#define SD_CLUSTER_FLAG_STRICT  0x0001 /* Strict mode for write */
> +
>  enum sd_status {
>         SD_STATUS_OK = 1,
>         SD_STATUS_WAIT,
> diff --git a/include/sheep.h b/include/sheep.h
> index 293e057..d460d54 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -160,7 +160,8 @@ static inline const char *sd_strerror(int err)
>                 [SD_RES_WAIT_FOR_FORMAT] = "Waiting for cluster to be
> formatted",
>                 [SD_RES_WAIT_FOR_JOIN] = "Waiting for other nodes to join
> cluster",
>                 [SD_RES_JOIN_FAILED] = "Node has failed to join cluster",
> -               [SD_RES_HALT] = "IO has halted as there are no living
> nodes",
> +               [SD_RES_HALT] =
> +                       "IO has halted as there are not enough living
> nodes",
>                 [SD_RES_READONLY] = "Object is read-only",
>
>                 /* from internal_proto.h */
> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
> index cb47e3f..366499e 100644
> --- a/include/sheepdog_proto.h
> +++ b/include/sheepdog_proto.h
> @@ -156,7 +156,7 @@ struct sd_req {
>                         uint64_t        ctime;
>                         uint8_t         copies;
>                         uint8_t         copy_policy;
> -                       uint8_t         reserved[2];
> +                       uint16_t        flags;
>                         uint32_t        tag;
>                 } cluster;
>                 struct {
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 75a2565..1e9bc1e 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -271,7 +271,7 @@ static int cluster_make_fs(const struct sd_req *req,
> struct sd_rsp *rsp,
>
>         sys->cinfo.nr_copies = req->cluster.copies;
>         sys->cinfo.copy_policy = req->cluster.copy_policy;
> -       sys->cinfo.flags = req->flags;
> +       sys->cinfo.flags = req->cluster.flags;
>         if (!sys->cinfo.nr_copies)
>                 sys->cinfo.nr_copies = SD_DEFAULT_COPIES;
>         sys->cinfo.ctime = req->cluster.ctime;
> diff --git a/sheep/request.c b/sheep/request.c
> index 5113fca..fd54253 100644
> --- a/sheep/request.c
> +++ b/sheep/request.c
> @@ -284,6 +284,22 @@ static void queue_peer_request(struct request *req)
>         queue_work(sys->io_wqueue, &req->work);
>  }
>
> +/*
> + * We make sure we write the exact number of copies to honor the promise
> of the
> + * redundancy for strict mode. This means that after writing of targeted
> data,
> + * they are redundant as promised and can withstand the random node
> failures.
> + *
> + * For example, with a 4:2 policy, we need at least write to 6 nodes with
> data
> + * strip and parity strips. For non-strict mode, we allow to write
> successfully
> + * only if the data are written fully with 4 nodes alive.
> + */
> +static bool has_enough_zones(struct request *req)
> +{
> +       uint64_t oid = req->rq.obj.oid;
> +
> +       return req->vinfo->nr_zones >=
> get_vdi_copy_number(oid_to_vid(oid));
> +}
> +
>  static void queue_gateway_request(struct request *req)
>  {
>         struct sd_req *hdr = &req->rq;
> @@ -310,13 +326,25 @@ static void queue_gateway_request(struct request
> *req)
>  queue_work:
>         if (RB_EMPTY_ROOT(&req->vinfo->vroot)) {
>                 sd_err("there is no living nodes");
> -               req->rp.result = SD_RES_HALT;
> -               put_request(req);
> -               return;
> +               goto end_request;
> +       }
> +       if (sys->cinfo.flags & SD_CLUSTER_FLAG_STRICT &&
> +           hdr->flags & SD_FLAG_CMD_WRITE &&
> +           !(hdr->flags & SD_FLAG_CMD_RECOVERY) &&
> +           !has_enough_zones(req)) {
> +               sd_err("not enough zones available");
> +               goto end_request;
>         }
> +
>         req->work.fn = do_process_work;
>         req->work.done = gateway_op_done;
>         queue_work(sys->gateway_wqueue, &req->work);
> +       return;
> +
> +end_request:
> +       req->rp.result = SD_RES_HALT;
> +       put_request(req);
> +       return;
>  }
>
>  static void queue_local_request(struct request *req)
> --
> 1.7.9.5
>
> --
> sheepdog mailing list
> sheepdog at lists.wpkg.org
> http://lists.wpkg.org/mailman/listinfo/sheepdog
>

-- 
--
Best Regard
Robin Dong
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.wpkg.org/pipermail/sheepdog/attachments/20131211/fe641b75/attachment-0004.html>