<div dir="ltr">Why not adding the check in gateway_forward_request() ? I think we should check the copy number before send forward the request out.</div><div class="gmail_extra"><br><br><div class="gmail_quote">2013/12/10 Liu Yuan <span dir="ltr"><<a href="mailto:namei.unix@gmail.com" target="_blank">namei.unix@gmail.com</a>></span><br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">We make sure we write the exact number of copies to honor the promise of the<br>

redundancy for "strict mode". This means that after writing of targeted data,<br>

they are redundant as promised and can withstand the random node failures.<br>

<br>

For example, with a 4:2 policy, we need at least write to 6 nodes with data<br>

strip and parity strips. For non-strict mode, we allow to write successfully<br>

only if the data are written fully with 4 nodes alive.<br>

<br>

Signed-off-by: Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>><br>

---<br>

 dog/cluster.c            |   10 +++++++++-<br>

 include/internal_proto.h |    2 ++<br>

 include/sheep.h          |    3 ++-<br>

 include/sheepdog_proto.h |    2 +-<br>

 sheep/ops.c              |    2 +-<br>

 sheep/request.c          |   34 +++++++++++++++++++++++++++++++---<br>

 6 files changed, 46 insertions(+), 7 deletions(-)<br>

<br>

diff --git a/dog/cluster.c b/dog/cluster.c<br>

index 611c91d..43df232 100644<br>

--- a/dog/cluster.c<br>

+++ b/dog/cluster.c<br>

@@ -21,6 +21,8 @@ static struct sd_option cluster_options[] = {<br>

        {'b', "store", true, "specify backend store"},<br>

        {'c', "copies", true, "specify the default data redundancy (number of copies)"},<br>

        {'f', "force", false, "do not prompt for confirmation"},<br>

+       {'t', "strict", false,<br>

+        "do not serve write request if number of nodes is not sufficient"},<br>

        {'s', "backend", false, "show backend store information"},<br>

        { 0, NULL, false, NULL },<br>

 };<br>

@@ -30,6 +32,7 @@ static struct cluster_cmd_data {<br>

        uint8_t copy_policy;<br>

        bool force;<br>

        bool show_store;<br>

+       bool strict;<br>

        char name[STORE_LEN];<br>

 } cluster_cmd_data;<br>

<br>

@@ -117,6 +120,8 @@ static int cluster_format(int argc, char **argv)<br>

                pstrcpy(store_name, STORE_LEN, DEFAULT_STORE);<br>

        hdr.data_length = strlen(store_name) + 1;<br>

        hdr.flags |= SD_FLAG_CMD_WRITE;<br>

+       if (cluster_cmd_data.strict)<br>

+               hdr.cluster.flags |= SD_CLUSTER_FLAG_STRICT;<br>

<br>

        printf("using backend %s store\n", store_name);<br>

        ret = dog_exec_req(&sd_nid, &hdr, store_name);<br>

@@ -552,7 +557,7 @@ static int cluster_check(int argc, char **argv)<br>

 static struct subcommand cluster_cmd[] = {<br>

        {"info", NULL, "aprhs", "show cluster information",<br>

         NULL, CMD_NEED_NODELIST, cluster_info, cluster_options},<br>

-       {"format", NULL, "bcaph", "create a Sheepdog store",<br>

+       {"format", NULL, "bctaph", "create a Sheepdog store",<br>

         NULL, CMD_NEED_NODELIST, cluster_format, cluster_options},<br>

        {"shutdown", NULL, "aph", "stop Sheepdog",<br>

         NULL, 0, cluster_shutdown, cluster_options},<br>

@@ -597,6 +602,9 @@ static int cluster_parser(int ch, const char *opt)<br>

        case 's':<br>

                cluster_cmd_data.show_store = true;<br>

                break;<br>

+       case 't':<br>

+               cluster_cmd_data.strict = true;<br>

+               break;<br>

        }<br>

<br>

        return 0;<br>

diff --git a/include/internal_proto.h b/include/internal_proto.h<br>

index b224c49..ac4e3f8 100644<br>

--- a/include/internal_proto.h<br>

+++ b/include/internal_proto.h<br>

@@ -126,6 +126,8 @@<br>

 #define SD_RES_CLUSTER_ERROR    0x91 /* Cluster driver error */<br>

 #define SD_RES_OBJ_TAKEN        0x92 /* Object ID is taken up */<br>

<br>

+#define SD_CLUSTER_FLAG_STRICT  0x0001 /* Strict mode for write */<br>

+<br>

 enum sd_status {<br>

        SD_STATUS_OK = 1,<br>

        SD_STATUS_WAIT,<br>

diff --git a/include/sheep.h b/include/sheep.h<br>

index 293e057..d460d54 100644<br>

--- a/include/sheep.h<br>

+++ b/include/sheep.h<br>

@@ -160,7 +160,8 @@ static inline const char *sd_strerror(int err)<br>

                [SD_RES_WAIT_FOR_FORMAT] = "Waiting for cluster to be formatted",<br>

                [SD_RES_WAIT_FOR_JOIN] = "Waiting for other nodes to join cluster",<br>

                [SD_RES_JOIN_FAILED] = "Node has failed to join cluster",<br>

-               [SD_RES_HALT] = "IO has halted as there are no living nodes",<br>

+               [SD_RES_HALT] =<br>

+                       "IO has halted as there are not enough living nodes",<br>

                [SD_RES_READONLY] = "Object is read-only",<br>

<br>

                /* from internal_proto.h */<br>

diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h<br>

index cb47e3f..366499e 100644<br>

--- a/include/sheepdog_proto.h<br>

+++ b/include/sheepdog_proto.h<br>

@@ -156,7 +156,7 @@ struct sd_req {<br>

                        uint64_t        ctime;<br>

                        uint8_t         copies;<br>

                        uint8_t         copy_policy;<br>

-                       uint8_t         reserved[2];<br>

+                       uint16_t        flags;<br>

                        uint32_t        tag;<br>

                } cluster;<br>

                struct {<br>

diff --git a/sheep/ops.c b/sheep/ops.c<br>

index 75a2565..1e9bc1e 100644<br>

--- a/sheep/ops.c<br>

+++ b/sheep/ops.c<br>

@@ -271,7 +271,7 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,<br>

<br>

        sys->cinfo.nr_copies = req->cluster.copies;<br>

        sys->cinfo.copy_policy = req->cluster.copy_policy;<br>

-       sys->cinfo.flags = req->flags;<br>

+       sys->cinfo.flags = req->cluster.flags;<br>

        if (!sys->cinfo.nr_copies)<br>

                sys->cinfo.nr_copies = SD_DEFAULT_COPIES;<br>

        sys->cinfo.ctime = req->cluster.ctime;<br>

diff --git a/sheep/request.c b/sheep/request.c<br>

index 5113fca..fd54253 100644<br>

--- a/sheep/request.c<br>

+++ b/sheep/request.c<br>

@@ -284,6 +284,22 @@ static void queue_peer_request(struct request *req)<br>

        queue_work(sys->io_wqueue, &req->work);<br>

 }<br>

<br>

+/*<br>

+ * We make sure we write the exact number of copies to honor the promise of the<br>

+ * redundancy for strict mode. This means that after writing of targeted data,<br>

+ * they are redundant as promised and can withstand the random node failures.<br>

+ *<br>

+ * For example, with a 4:2 policy, we need at least write to 6 nodes with data<br>

+ * strip and parity strips. For non-strict mode, we allow to write successfully<br>

+ * only if the data are written fully with 4 nodes alive.<br>

+ */<br>

+static bool has_enough_zones(struct request *req)<br>

+{<br>

+       uint64_t oid = req->rq.obj.oid;<br>

+<br>

+       return req->vinfo->nr_zones >= get_vdi_copy_number(oid_to_vid(oid));<br>

+}<br>

+<br>

 static void queue_gateway_request(struct request *req)<br>

 {<br>

        struct sd_req *hdr = &req->rq;<br>

@@ -310,13 +326,25 @@ static void queue_gateway_request(struct request *req)<br>

 queue_work:<br>

        if (RB_EMPTY_ROOT(&req->vinfo->vroot)) {<br>

                sd_err("there is no living nodes");<br>

-               req->rp.result = SD_RES_HALT;<br>

-               put_request(req);<br>

-               return;<br>

+               goto end_request;<br>

+       }<br>

+       if (sys->cinfo.flags & SD_CLUSTER_FLAG_STRICT &&<br>

+           hdr->flags & SD_FLAG_CMD_WRITE &&<br>

+           !(hdr->flags & SD_FLAG_CMD_RECOVERY) &&<br>

+           !has_enough_zones(req)) {<br>

+               sd_err("not enough zones available");<br>

+               goto end_request;<br>

        }<br>

+<br>

        req->work.fn = do_process_work;<br>

        req->work.done = gateway_op_done;<br>

        queue_work(sys->gateway_wqueue, &req->work);<br>

+       return;<br>

+<br>

+end_request:<br>

+       req->rp.result = SD_RES_HALT;<br>

+       put_request(req);<br>

+       return;<br>

 }<br>

<br>

 static void queue_local_request(struct request *req)<br>

<span class="HOEnZb"><font color="#888888">--<br>

1.7.9.5<br>

<br>

--<br>

sheepdog mailing list<br>

<a href="mailto:sheepdog@lists.wpkg.org">sheepdog@lists.wpkg.org</a><br>

<a href="http://lists.wpkg.org/mailman/listinfo/sheepdog" target="_blank">http://lists.wpkg.org/mailman/listinfo/sheepdog</a><br>

</font></span></blockquote></div><br><br clear="all"><div><br></div>-- <br>--<br>Best Regard<br>Robin Dong

</div>