On 05/10/2012 07:17 AM, Shevek wrote: > Better defensive programming against invalid join messages. > > We have a cluster which got into a state where the protocol mismatched, > and the malloc metadata in the heap got corrupted. This patch would > detect the potential heap corruption and make a version mismatch > more easily detectable. It is also good defensive programming > against invalid join messages. > > Signed-off-by: Shevek <shevek at anarres.org> > > --- > sheep/cluster.h | 2 +- > sheep/cluster/accord.c | 2 +- > sheep/cluster/corosync.c | 2 +- > sheep/cluster/local.c | 2 +- > sheep/cluster/zookeeper.c | 2 +- > sheep/group.c | 24 +++++++++++++++++------- > 6 files changed, 22 insertions(+), 12 deletions(-) > > diff --git a/sheep/cluster.h b/sheep/cluster.h > index d543e99..759d0eb 100644 > --- a/sheep/cluster.h > +++ b/sheep/cluster.h > @@ -190,6 +190,6 @@ void sd_leave_handler(struct sd_node *left, struct > sd_node *members, > size_t nr_members); > void sd_notify_handler(struct sd_node *sender, void *msg, size_t > msg_len); > enum cluster_join_result sd_check_join_cb(struct sd_node *joining, > - void *opaque); > + void *opaque, size_t opaque_len); > > #endif > diff --git a/sheep/cluster/accord.c b/sheep/cluster/accord.c > index 1fdca91..dad1c2f 100644 > --- a/sheep/cluster/accord.c > +++ b/sheep/cluster/accord.c > @@ -576,7 +576,7 @@ static int accord_dispatch(void) > case EVENT_JOIN: > if (ev.blocked) { > if (node_cmp(&ev.nodes[0], &this_node) == 0) { > - res = sd_check_join_cb(&ev.sender, ev.buf); > + res = sd_check_join_cb(&ev.sender, ev.buf, ev.buf_len); > ev.join_result = res; > ev.blocked = 0; > > diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c > index 4a588e9..217d060 100644 > --- a/sheep/cluster/corosync.c > +++ b/sheep/cluster/corosync.c > @@ -296,7 +296,7 @@ static int __corosync_dispatch_one(struct > corosync_event *cevent) > return 0; > > res = sd_check_join_cb(&cevent->sender.ent, > - cevent->msg); > + cevent->msg, cevent->msg_len); > if (res == CJ_RES_MASTER_TRANSFER) > nr_cpg_nodes = 0; > > diff --git a/sheep/cluster/local.c b/sheep/cluster/local.c > index fd84615..3d75e68 100644 > --- a/sheep/cluster/local.c > +++ b/sheep/cluster/local.c > @@ -404,7 +404,7 @@ static int local_dispatch(void) > case EVENT_JOIN: > if (ev->blocked) { > if (node_cmp(&ev->nodes[0], &this_node) == 0) { > - res = sd_check_join_cb(&ev->sender, ev->buf); > + res = sd_check_join_cb(&ev->sender, ev->buf, ev->buf_len); > ev->join_result = res; > ev->blocked = 0; > msync(ev, sizeof(*ev), MS_SYNC); > diff --git a/sheep/cluster/zookeeper.c b/sheep/cluster/zookeeper.c > index 491056a..335c261 100644 > --- a/sheep/cluster/zookeeper.c > +++ b/sheep/cluster/zookeeper.c > @@ -777,7 +777,7 @@ static int zk_dispatch(void) > dprintf("one sheep joined[up], nr_nodes:%ld, sender:%s, joined:%d > \n", > nr_zk_nodes, node_to_str(&ev.sender.node), ev.sender.joined); > if (is_master(zhandle, &this_node)) { > - res = sd_check_join_cb(&ev.sender.node, ev.buf); > + res = sd_check_join_cb(&ev.sender.node, ev.buf, ev.buf_len); > ev.join_result = res; > ev.blocked = 0; > ev.sender.joined = 1; > diff --git a/sheep/group.c b/sheep/group.c > index c7fd387..1017437 100644 > --- a/sheep/group.c > +++ b/sheep/group.c > @@ -449,12 +449,6 @@ out: > > static void join(struct sd_node *joining, struct join_message *msg) > { > - if (msg->proto_ver != SD_SHEEP_PROTO_VER) { > - eprintf("joining node sent a message with the wrong protocol version > \n"); > - msg->result = SD_RES_VER_MISMATCH; > - return; > - } > - > msg->result = get_cluster_status(joining, msg->nodes, msg->nr_nodes, > msg->ctime, msg->epoch, > &msg->cluster_status, &msg->inc_epoch); > @@ -735,11 +729,27 @@ static void __sd_leave(struct event_struct > *cevent) > } > } > > -enum cluster_join_result sd_check_join_cb(struct sd_node *joining, void > *opaque) > +enum cluster_join_result sd_check_join_cb(struct sd_node *joining, void > *opaque, size_t opaque_len) > { > struct join_message *jm = opaque; > struct node *node; > > + if (opaque_len < sizeof(*jm)) { > + eprintf("joining node sent a message which is too short\n"); > + // msg->result = SD_RES_VER_MISMATCH; // This may segfault anyway if > msg_len == 0 > + return CJ_RES_FAIL; > + } > + if (opaque_len != get_join_message_size(jm)) { > + eprintf("joining node sent a message size %zu, expected %zu\n", > opaque_len, get_join_message_size(jm)); > + jm->result = SD_RES_VER_MISMATCH; > + return CJ_RES_FAIL; > + } > + if (jm->proto_ver != SD_SHEEP_PROTO_VER) { > + eprintf("joining node sent a message with the wrong protocol version > \n"); > + jm->result = SD_RES_VER_MISMATCH; > + return CJ_RES_FAIL; > + } > + > if (node_cmp(joining, &sys->this_node) == 0) { > struct sd_node entries[SD_MAX_NODES]; > int nr_entries; Style problems too :) Thanks, Yuan |