<div dir="ltr">Why not adding the check in gateway_forward_request() ? I think we should check the copy number before send forward the request out.</div><div class="gmail_extra"><br><br><div class="gmail_quote">2013/12/10 Liu Yuan <span dir="ltr"><<a href="mailto:namei.unix@gmail.com" target="_blank">namei.unix@gmail.com</a>></span><br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">We make sure we write the exact number of copies to honor the promise of the<br>
redundancy for "strict mode". This means that after writing of targeted data,<br>
they are redundant as promised and can withstand the random node failures.<br>
<br>
For example, with a 4:2 policy, we need at least write to 6 nodes with data<br>
strip and parity strips. For non-strict mode, we allow to write successfully<br>
only if the data are written fully with 4 nodes alive.<br>
<br>
Signed-off-by: Liu Yuan <<a href="mailto:namei.unix@gmail.com">namei.unix@gmail.com</a>><br>
---<br>
dog/cluster.c | 10 +++++++++-<br>
include/internal_proto.h | 2 ++<br>
include/sheep.h | 3 ++-<br>
include/sheepdog_proto.h | 2 +-<br>
sheep/ops.c | 2 +-<br>
sheep/request.c | 34 +++++++++++++++++++++++++++++++---<br>
6 files changed, 46 insertions(+), 7 deletions(-)<br>
<br>
diff --git a/dog/cluster.c b/dog/cluster.c<br>
index 611c91d..43df232 100644<br>
--- a/dog/cluster.c<br>
+++ b/dog/cluster.c<br>
@@ -21,6 +21,8 @@ static struct sd_option cluster_options[] = {<br>
{'b', "store", true, "specify backend store"},<br>
{'c', "copies", true, "specify the default data redundancy (number of copies)"},<br>
{'f', "force", false, "do not prompt for confirmation"},<br>
+ {'t', "strict", false,<br>
+ "do not serve write request if number of nodes is not sufficient"},<br>
{'s', "backend", false, "show backend store information"},<br>
{ 0, NULL, false, NULL },<br>
};<br>
@@ -30,6 +32,7 @@ static struct cluster_cmd_data {<br>
uint8_t copy_policy;<br>
bool force;<br>
bool show_store;<br>
+ bool strict;<br>
char name[STORE_LEN];<br>
} cluster_cmd_data;<br>
<br>
@@ -117,6 +120,8 @@ static int cluster_format(int argc, char **argv)<br>
pstrcpy(store_name, STORE_LEN, DEFAULT_STORE);<br>
hdr.data_length = strlen(store_name) + 1;<br>
hdr.flags |= SD_FLAG_CMD_WRITE;<br>
+ if (cluster_cmd_data.strict)<br>
+ hdr.cluster.flags |= SD_CLUSTER_FLAG_STRICT;<br>
<br>
printf("using backend %s store\n", store_name);<br>
ret = dog_exec_req(&sd_nid, &hdr, store_name);<br>
@@ -552,7 +557,7 @@ static int cluster_check(int argc, char **argv)<br>
static struct subcommand cluster_cmd[] = {<br>
{"info", NULL, "aprhs", "show cluster information",<br>
NULL, CMD_NEED_NODELIST, cluster_info, cluster_options},<br>
- {"format", NULL, "bcaph", "create a Sheepdog store",<br>
+ {"format", NULL, "bctaph", "create a Sheepdog store",<br>
NULL, CMD_NEED_NODELIST, cluster_format, cluster_options},<br>
{"shutdown", NULL, "aph", "stop Sheepdog",<br>
NULL, 0, cluster_shutdown, cluster_options},<br>
@@ -597,6 +602,9 @@ static int cluster_parser(int ch, const char *opt)<br>
case 's':<br>
cluster_cmd_data.show_store = true;<br>
break;<br>
+ case 't':<br>
+ cluster_cmd_data.strict = true;<br>
+ break;<br>
}<br>
<br>
return 0;<br>
diff --git a/include/internal_proto.h b/include/internal_proto.h<br>
index b224c49..ac4e3f8 100644<br>
--- a/include/internal_proto.h<br>
+++ b/include/internal_proto.h<br>
@@ -126,6 +126,8 @@<br>
#define SD_RES_CLUSTER_ERROR 0x91 /* Cluster driver error */<br>
#define SD_RES_OBJ_TAKEN 0x92 /* Object ID is taken up */<br>
<br>
+#define SD_CLUSTER_FLAG_STRICT 0x0001 /* Strict mode for write */<br>
+<br>
enum sd_status {<br>
SD_STATUS_OK = 1,<br>
SD_STATUS_WAIT,<br>
diff --git a/include/sheep.h b/include/sheep.h<br>
index 293e057..d460d54 100644<br>
--- a/include/sheep.h<br>
+++ b/include/sheep.h<br>
@@ -160,7 +160,8 @@ static inline const char *sd_strerror(int err)<br>
[SD_RES_WAIT_FOR_FORMAT] = "Waiting for cluster to be formatted",<br>
[SD_RES_WAIT_FOR_JOIN] = "Waiting for other nodes to join cluster",<br>
[SD_RES_JOIN_FAILED] = "Node has failed to join cluster",<br>
- [SD_RES_HALT] = "IO has halted as there are no living nodes",<br>
+ [SD_RES_HALT] =<br>
+ "IO has halted as there are not enough living nodes",<br>
[SD_RES_READONLY] = "Object is read-only",<br>
<br>
/* from internal_proto.h */<br>
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h<br>
index cb47e3f..366499e 100644<br>
--- a/include/sheepdog_proto.h<br>
+++ b/include/sheepdog_proto.h<br>
@@ -156,7 +156,7 @@ struct sd_req {<br>
uint64_t ctime;<br>
uint8_t copies;<br>
uint8_t copy_policy;<br>
- uint8_t reserved[2];<br>
+ uint16_t flags;<br>
uint32_t tag;<br>
} cluster;<br>
struct {<br>
diff --git a/sheep/ops.c b/sheep/ops.c<br>
index 75a2565..1e9bc1e 100644<br>
--- a/sheep/ops.c<br>
+++ b/sheep/ops.c<br>
@@ -271,7 +271,7 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,<br>
<br>
sys->cinfo.nr_copies = req->cluster.copies;<br>
sys->cinfo.copy_policy = req->cluster.copy_policy;<br>
- sys->cinfo.flags = req->flags;<br>
+ sys->cinfo.flags = req->cluster.flags;<br>
if (!sys->cinfo.nr_copies)<br>
sys->cinfo.nr_copies = SD_DEFAULT_COPIES;<br>
sys->cinfo.ctime = req->cluster.ctime;<br>
diff --git a/sheep/request.c b/sheep/request.c<br>
index 5113fca..fd54253 100644<br>
--- a/sheep/request.c<br>
+++ b/sheep/request.c<br>
@@ -284,6 +284,22 @@ static void queue_peer_request(struct request *req)<br>
queue_work(sys->io_wqueue, &req->work);<br>
}<br>
<br>
+/*<br>
+ * We make sure we write the exact number of copies to honor the promise of the<br>
+ * redundancy for strict mode. This means that after writing of targeted data,<br>
+ * they are redundant as promised and can withstand the random node failures.<br>
+ *<br>
+ * For example, with a 4:2 policy, we need at least write to 6 nodes with data<br>
+ * strip and parity strips. For non-strict mode, we allow to write successfully<br>
+ * only if the data are written fully with 4 nodes alive.<br>
+ */<br>
+static bool has_enough_zones(struct request *req)<br>
+{<br>
+ uint64_t oid = req->rq.obj.oid;<br>
+<br>
+ return req->vinfo->nr_zones >= get_vdi_copy_number(oid_to_vid(oid));<br>
+}<br>
+<br>
static void queue_gateway_request(struct request *req)<br>
{<br>
struct sd_req *hdr = &req->rq;<br>
@@ -310,13 +326,25 @@ static void queue_gateway_request(struct request *req)<br>
queue_work:<br>
if (RB_EMPTY_ROOT(&req->vinfo->vroot)) {<br>
sd_err("there is no living nodes");<br>
- req->rp.result = SD_RES_HALT;<br>
- put_request(req);<br>
- return;<br>
+ goto end_request;<br>
+ }<br>
+ if (sys->cinfo.flags & SD_CLUSTER_FLAG_STRICT &&<br>
+ hdr->flags & SD_FLAG_CMD_WRITE &&<br>
+ !(hdr->flags & SD_FLAG_CMD_RECOVERY) &&<br>
+ !has_enough_zones(req)) {<br>
+ sd_err("not enough zones available");<br>
+ goto end_request;<br>
}<br>
+<br>
req->work.fn = do_process_work;<br>
req->work.done = gateway_op_done;<br>
queue_work(sys->gateway_wqueue, &req->work);<br>
+ return;<br>
+<br>
+end_request:<br>
+ req->rp.result = SD_RES_HALT;<br>
+ put_request(req);<br>
+ return;<br>
}<br>
<br>
static void queue_local_request(struct request *req)<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.7.9.5<br>
<br>
--<br>
sheepdog mailing list<br>
<a href="mailto:sheepdog@lists.wpkg.org">sheepdog@lists.wpkg.org</a><br>
<a href="http://lists.wpkg.org/mailman/listinfo/sheepdog" target="_blank">http://lists.wpkg.org/mailman/listinfo/sheepdog</a><br>
</font></span></blockquote></div><br><br clear="all"><div><br></div>-- <br>--<br>Best Regard<br>Robin Dong
</div>