<div dir="ltr">Why not adding the check in gateway_forward_request() ? I think we should check the copy number before send forward the request out.</div><div class="gmail_extra"><br><br><div class="gmail_quote">2013/12/10 Liu Yuan <span dir="ltr"><<a href="mailto:namei.unix@gmail.com" target="_blank">namei.unix@gmail.com</a>></span><br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">We make sure we write the exact number of copies to honor the promise of the<br>
redundancy for "strict mode". This means that after writing of targeted data,<br>
they are redundant as promised and can withstand the random node failures.<br>
<br>
For example, with a 4:2 policy, we need at least write to 6 nodes with data<br>
strip and parity strips. For non-strict mode, we allow to write successfully<br>
only if the data are written fully with 4 nodes alive.<br>
<br>
Signed-off-by: Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>><br>
---<br>
 dog/cluster.c            |   10 +++++++++-<br>
 include/internal_proto.h |    2 ++<br>
 include/sheep.h          |    3 ++-<br>
 include/sheepdog_proto.h |    2 +-<br>
 sheep/ops.c              |    2 +-<br>
 sheep/request.c          |   34 +++++++++++++++++++++++++++++++---<br>
 6 files changed, 46 insertions(+), 7 deletions(-)<br>
<br>
diff --git a/dog/cluster.c b/dog/cluster.c<br>
index 611c91d..43df232 100644<br>
--- a/dog/cluster.c<br>
+++ b/dog/cluster.c<br>
@@ -21,6 +21,8 @@ static struct sd_option cluster_options[] = {<br>
        {'b', "store", true, "specify backend store"},<br>
        {'c', "copies", true, "specify the default data redundancy (number of copies)"},<br>
        {'f', "force", false, "do not prompt for confirmation"},<br>
+       {'t', "strict", false,<br>
+        "do not serve write request if number of nodes is not sufficient"},<br>
        {'s', "backend", false, "show backend store information"},<br>
        { 0, NULL, false, NULL },<br>
 };<br>
@@ -30,6 +32,7 @@ static struct cluster_cmd_data {<br>
        uint8_t copy_policy;<br>
        bool force;<br>
        bool show_store;<br>
+       bool strict;<br>
        char name[STORE_LEN];<br>
 } cluster_cmd_data;<br>
<br>
@@ -117,6 +120,8 @@ static int cluster_format(int argc, char **argv)<br>
                pstrcpy(store_name, STORE_LEN, DEFAULT_STORE);<br>
        hdr.data_length = strlen(store_name) + 1;<br>
        hdr.flags |= SD_FLAG_CMD_WRITE;<br>
+       if (cluster_cmd_data.strict)<br>
+               hdr.cluster.flags |= SD_CLUSTER_FLAG_STRICT;<br>
<br>
        printf("using backend %s store\n", store_name);<br>
        ret = dog_exec_req(&sd_nid, &hdr, store_name);<br>
@@ -552,7 +557,7 @@ static int cluster_check(int argc, char **argv)<br>
 static struct subcommand cluster_cmd[] = {<br>
        {"info", NULL, "aprhs", "show cluster information",<br>
         NULL, CMD_NEED_NODELIST, cluster_info, cluster_options},<br>
-       {"format", NULL, "bcaph", "create a Sheepdog store",<br>
+       {"format", NULL, "bctaph", "create a Sheepdog store",<br>
         NULL, CMD_NEED_NODELIST, cluster_format, cluster_options},<br>
        {"shutdown", NULL, "aph", "stop Sheepdog",<br>
         NULL, 0, cluster_shutdown, cluster_options},<br>
@@ -597,6 +602,9 @@ static int cluster_parser(int ch, const char *opt)<br>
        case 's':<br>
                cluster_cmd_data.show_store = true;<br>
                break;<br>
+       case 't':<br>
+               cluster_cmd_data.strict = true;<br>
+               break;<br>
        }<br>
<br>
        return 0;<br>
diff --git a/include/internal_proto.h b/include/internal_proto.h<br>
index b224c49..ac4e3f8 100644<br>
--- a/include/internal_proto.h<br>
+++ b/include/internal_proto.h<br>
@@ -126,6 +126,8 @@<br>
 #define SD_RES_CLUSTER_ERROR    0x91 /* Cluster driver error */<br>
 #define SD_RES_OBJ_TAKEN        0x92 /* Object ID is taken up */<br>
<br>
+#define SD_CLUSTER_FLAG_STRICT  0x0001 /* Strict mode for write */<br>
+<br>
 enum sd_status {<br>
        SD_STATUS_OK = 1,<br>
        SD_STATUS_WAIT,<br>
diff --git a/include/sheep.h b/include/sheep.h<br>
index 293e057..d460d54 100644<br>
--- a/include/sheep.h<br>
+++ b/include/sheep.h<br>
@@ -160,7 +160,8 @@ static inline const char *sd_strerror(int err)<br>
                [SD_RES_WAIT_FOR_FORMAT] = "Waiting for cluster to be formatted",<br>
                [SD_RES_WAIT_FOR_JOIN] = "Waiting for other nodes to join cluster",<br>
                [SD_RES_JOIN_FAILED] = "Node has failed to join cluster",<br>
-               [SD_RES_HALT] = "IO has halted as there are no living nodes",<br>
+               [SD_RES_HALT] =<br>
+                       "IO has halted as there are not enough living nodes",<br>
                [SD_RES_READONLY] = "Object is read-only",<br>
<br>
                /* from internal_proto.h */<br>
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h<br>
index cb47e3f..366499e 100644<br>
--- a/include/sheepdog_proto.h<br>
+++ b/include/sheepdog_proto.h<br>
@@ -156,7 +156,7 @@ struct sd_req {<br>
                        uint64_t        ctime;<br>
                        uint8_t         copies;<br>
                        uint8_t         copy_policy;<br>
-                       uint8_t         reserved[2];<br>
+                       uint16_t        flags;<br>
                        uint32_t        tag;<br>
                } cluster;<br>
                struct {<br>
diff --git a/sheep/ops.c b/sheep/ops.c<br>
index 75a2565..1e9bc1e 100644<br>
--- a/sheep/ops.c<br>
+++ b/sheep/ops.c<br>
@@ -271,7 +271,7 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,<br>
<br>
        sys->cinfo.nr_copies = req->cluster.copies;<br>
        sys->cinfo.copy_policy = req->cluster.copy_policy;<br>
-       sys->cinfo.flags = req->flags;<br>
+       sys->cinfo.flags = req->cluster.flags;<br>
        if (!sys->cinfo.nr_copies)<br>
                sys->cinfo.nr_copies = SD_DEFAULT_COPIES;<br>
        sys->cinfo.ctime = req->cluster.ctime;<br>
diff --git a/sheep/request.c b/sheep/request.c<br>
index 5113fca..fd54253 100644<br>
--- a/sheep/request.c<br>
+++ b/sheep/request.c<br>
@@ -284,6 +284,22 @@ static void queue_peer_request(struct request *req)<br>
        queue_work(sys->io_wqueue, &req->work);<br>
 }<br>
<br>
+/*<br>
+ * We make sure we write the exact number of copies to honor the promise of the<br>
+ * redundancy for strict mode. This means that after writing of targeted data,<br>
+ * they are redundant as promised and can withstand the random node failures.<br>
+ *<br>
+ * For example, with a 4:2 policy, we need at least write to 6 nodes with data<br>
+ * strip and parity strips. For non-strict mode, we allow to write successfully<br>
+ * only if the data are written fully with 4 nodes alive.<br>
+ */<br>
+static bool has_enough_zones(struct request *req)<br>
+{<br>
+       uint64_t oid = req->rq.obj.oid;<br>
+<br>
+       return req->vinfo->nr_zones >= get_vdi_copy_number(oid_to_vid(oid));<br>
+}<br>
+<br>
 static void queue_gateway_request(struct request *req)<br>
 {<br>
        struct sd_req *hdr = &req->rq;<br>
@@ -310,13 +326,25 @@ static void queue_gateway_request(struct request *req)<br>
 queue_work:<br>
        if (RB_EMPTY_ROOT(&req->vinfo->vroot)) {<br>
                sd_err("there is no living nodes");<br>
-               req->rp.result = SD_RES_HALT;<br>
-               put_request(req);<br>
-               return;<br>
+               goto end_request;<br>
+       }<br>
+       if (sys->cinfo.flags & SD_CLUSTER_FLAG_STRICT &&<br>
+           hdr->flags & SD_FLAG_CMD_WRITE &&<br>
+           !(hdr->flags & SD_FLAG_CMD_RECOVERY) &&<br>
+           !has_enough_zones(req)) {<br>
+               sd_err("not enough zones available");<br>
+               goto end_request;<br>
        }<br>
+<br>
        req->work.fn = do_process_work;<br>
        req->work.done = gateway_op_done;<br>
        queue_work(sys->gateway_wqueue, &req->work);<br>
+       return;<br>
+<br>
+end_request:<br>
+       req->rp.result = SD_RES_HALT;<br>
+       put_request(req);<br>
+       return;<br>
 }<br>
<br>
 static void queue_local_request(struct request *req)<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.7.9.5<br>
<br>
--<br>
sheepdog mailing list<br>
<a href="mailto:sheepdog@lists.wpkg.org">sheepdog@lists.wpkg.org</a><br>
<a href="http://lists.wpkg.org/mailman/listinfo/sheepdog" target="_blank">http://lists.wpkg.org/mailman/listinfo/sheepdog</a><br>
</font></span></blockquote></div><br><br clear="all"><div><br></div>-- <br>--<br>Best Regard<br>Robin Dong
</div>