At Sun, 16 Oct 2011 18:35:15 +0800, Liu Yuan wrote: > > From: Liu Yuan <tailai.ly at taobao.com> > > We use SD_STATUS_HALT to identify the cluster state when it should not serve > IO requests. > > This is optional, users might risk themselves to turn off this HALT status. As > the below command: > > $ collie cluster format -H > or > $ collie cluster format --nohalt > > By default, this is enabled. > > [Test Case] > > [1] > steps: > > for i in 0 1 2 3; do ./sheep/sheep -d /store/$i -z $i -p 700$i; sleep 1; done > ./collie/collie cluster format --copies=3; > for i in 0 1; do pkill -f "sheep -d /store/$i"; sleep 1; done > for i in 2 3; do ./collie/collie cluster info -p 700$i; done > for i in 0 1; do ./sheep/sheep -d /store/$i -z $i -p 700$i; sleep 1; done > for i in 0 1 2 3; do ./collie/collie cluster info -p 700$i; done > > output: > > Cluster status: The sheepdog is stopped doing IO, short of living nodes > > Creation time Epoch Nodes > 2011-10-11 16:26:02 3 [192.168.0.1:7002, 192.168.0.1:7003] > 2011-10-11 16:26:02 2 [192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003] > 2011-10-11 16:26:02 1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003] > Cluster status: The sheepdog is stopped doing IO, short of living nodes > > Creation time Epoch Nodes > 2011-10-11 16:26:02 3 [192.168.0.1:7002, 192.168.0.1:7003] > 2011-10-11 16:26:02 2 [192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003] > 2011-10-11 16:26:02 1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003] > Cluster status: running > > Creation time Epoch Nodes > 2011-10-11 16:26:02 5 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003] > 2011-10-11 16:26:02 4 [192.168.0.1:7000, 192.168.0.1:7002, 192.168.0.1:7003] > 2011-10-11 16:26:02 3 [192.168.0.1:7002, 192.168.0.1:7003] > 2011-10-11 16:26:02 2 [192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003] > 2011-10-11 16:26:02 1 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002, 192.168.0.1:7003] > > ... > > [2] > steps: > for i in 0 1; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done > collie/collie cluster format > for i in 0 1; do collie/collie cluster info -p 700$i;done > for i in 0; do pkill -f "sheep/sheep -d /store/$i"; sleep 1; done > for i in 2; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done > for i in 1 2; do pkill -f "sheep/sheep -d /store/$i"; sleep 1; done > for i in 0 1 2; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done > for i in 0 1 2; do sheep/sheep -d /store/$i -z $i -p 700$i;sleep 1;done > for i in 0 1 2; do collie/collie cluster info -p 700$i;done > > output: > Cluster status: The sheepdog is stopped doing IO, short of living nodes > > Creation time Epoch Nodes > 2011-10-16 18:11:07 1 [192.168.0.1:7000, 192.168.0.1:7001] > Cluster status: The sheepdog is stopped doing IO, short of living nodes > > Creation time Epoch Nodes > 2011-10-16 18:11:07 1 [192.168.0.1:7000, 192.168.0.1:7001] > Cluster status: running > > Creation time Epoch Nodes > 2011-10-16 18:11:07 6 [192.168.0.1:7000, 192.168.0.1:7001, 192.168.0.1:7002] > 2011-10-16 18:11:07 5 [192.168.0.1:7000, 192.168.0.1:7002] > 2011-10-16 18:11:07 4 [192.168.0.1:7002] > 2011-10-16 18:11:07 3 [192.168.0.1:7001, 192.168.0.1:7002] > 2011-10-16 18:11:07 2 [192.168.0.1:7001] > 2011-10-16 18:11:07 1 [192.168.0.1:7000, 192.168.0.1:7001] > > ... > > Signed-off-by: Liu Yuan <tailai.ly at taobao.com> > --- > collie/cluster.c | 14 +++++++++++++- > collie/collie.c | 1 + > sheep/group.c | 30 +++++++++++++++++++++++++++++- > sheep/sheep_priv.h | 2 ++ > 4 files changed, 45 insertions(+), 2 deletions(-) > > diff --git a/collie/cluster.c b/collie/cluster.c > index 0d5dfbe..3b16308 100644 > --- a/collie/cluster.c > +++ b/collie/cluster.c > @@ -16,8 +16,15 @@ > > struct cluster_cmd_data { > int copies; > + int nohalt; > } cluster_cmd_data; > > +static void set_nohalt(uint32_t *p) > +{ > + if (p) > + *p |= 1 << 31; > +} > + > static int cluster_format(int argc, char **argv) > { > int fd, ret; > @@ -36,6 +43,8 @@ static int cluster_format(int argc, char **argv) > > hdr.opcode = SD_OP_MAKE_FS; > hdr.copies = cluster_cmd_data.copies; > + if (cluster_cmd_data.nohalt) > + set_nohalt(&hdr.copies); I think we should use hdr.flags to set the nohalt option. > hdr.epoch = node_list_version; > hdr.ctime = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000; > > @@ -163,7 +172,7 @@ static int cluster_shutdown(int argc, char **argv) > static struct subcommand cluster_cmd[] = { > {"info", NULL, "aprh", "show cluster information", > 0, cluster_info}, > - {"format", NULL, "caph", "create a Sheepdog storage", > + {"format", NULL, "cHaph", "create a Sheepdog storage", > 0, cluster_format}, > {"shutdown", NULL, "aph", "stop Sheepdog", > SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown}, > @@ -176,6 +185,9 @@ static int cluster_parser(int ch, char *opt) > case 'c': > cluster_cmd_data.copies = atoi(opt); > break; > + case 'H': > + cluster_cmd_data.nohalt = 1; > + break; > } > > return 0; > diff --git a/collie/collie.c b/collie/collie.c > index e064a0a..df5dca4 100644 > --- a/collie/collie.c > +++ b/collie/collie.c > @@ -41,6 +41,7 @@ static const struct sd_option collie_options[] = { > > /* cluster options */ > {'c', "copies", 1, "set the number of data redundancy"}, > + {'H', "nohalt", 0, "serve the IO rquests even lack of enough redundant nodes"}, > > { 0, NULL, 0, NULL }, > }; > diff --git a/sheep/group.c b/sheep/group.c > index 5d06745..103a647 100644 > --- a/sheep/group.c > +++ b/sheep/group.c > @@ -983,7 +983,16 @@ static void vdi_op_done(struct vdi_op_message *msg) > > set_global_nr_copies(sys->nr_sobjs); > > - sys->status = SD_STATUS_OK; > + if (sys_nohalt()) > + sys->status = SD_STATUS_OK; > + else { > + int nr_zones = get_zones_nr_from(&sys->sd_node_list); > + > + if (nr_zones >= sys->nr_sobjs) > + sys->status = SD_STATUS_OK; > + else > + sys->status = SD_STATUS_HALT; > + } > break; > case SD_OP_SHUTDOWN: > sys->status = SD_STATUS_SHUTDOWN; > @@ -1210,6 +1219,13 @@ static void __sd_notify_done(struct cpg_event *cevent) > } > start_recovery(sys->epoch); > } > + > + if (sys->status == SD_STATUS_HALT) { > + int nr_zones = get_zones_nr_from(&sys->sd_node_list); > + > + if (nr_zones >= sys->nr_sobjs) > + sys->status = SD_STATUS_OK; > + } > } > > static void sd_notify_handler(struct sheepid *sender, void *msg, size_t msg_len) > @@ -1438,6 +1454,11 @@ static void __sd_join_done(struct cpg_event *cevent) > send_join_request(&w->joined); > } > > +int sys_nohalt() > +{ > + return sys->nr_sobjs & (1 << 31); sys->nr_sobjs is used everywhere in the main thread, so I think this doesn't work at all. For example: $ sheep /store/0 $ collie cluster format -H $ qemu-img create sheepdog:test 4G Formatting 'sheepdog:test', fmt=raw size=4294967296 qemu-img: Failed to write the requested VDI, test qemu-img: sheepdog:test: error while creating raw: Input/output error We should declare another field in struct cluster info for the nohalt option? Thanks, Kazutaka |