[Sheepdog] [PATCH v2 2/7] sheep: remove notification message type

MORITA Kazutaka morita.kazutaka at lab.ntt.co.jp
Thu Oct 20 09:24:27 CEST 2011


We don't use cdrv->notify() for node membership management now, so we
don't need to specify the type of message.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 sheep/group.c      |  301 ++++++----------------------------------------------
 sheep/sheep_priv.h |    6 -
 2 files changed, 34 insertions(+), 273 deletions(-)

diff --git a/sheep/group.c b/sheep/group.c
index f74db62..7315fce 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -37,9 +37,8 @@ enum deliver_msg_state {
 
 struct message_header {
 	uint8_t proto_ver;
-	uint8_t pad;
-	uint8_t op;
 	uint8_t state;
+	uint8_t pad[2];
 	uint32_t msg_length;
 	struct sheepdog_node_list_entry from;
 };
@@ -64,11 +63,6 @@ struct join_message {
 	} leave_nodes[SD_MAX_NODES];
 };
 
-struct leave_message {
-	struct message_header header;
-	uint32_t epoch;
-};
-
 struct vdi_op_message {
 	struct message_header header;
 	struct sd_vdi_req req;
@@ -76,11 +70,6 @@ struct vdi_op_message {
 	uint8_t data[0];
 };
 
-struct mastership_tx_message {
-	struct message_header header;
-	uint32_t epoch;
-};
-
 struct work_notify {
 	struct cpg_event cev;
 
@@ -141,31 +130,6 @@ static void cpg_event_set_##name(void)					\
 CPG_EVENT_WORK_FNS(RUNNING, running)
 CPG_EVENT_WORK_FNS(JOINING, joining)
 
-static inline int join_message(struct message_header *m)
-{
-	return m->op == SD_MSG_JOIN;
-}
-
-static inline int vdi_op_message(struct message_header *m)
-{
-	return m->op == SD_MSG_VDI_OP;
-}
-
-static inline int master_chg_message(struct message_header *m)
-{
-	return m->op == SD_MSG_MASTER_CHANGED;
-}
-
-static inline int leave_message(struct message_header *m)
-{
-	return m->op == SD_MSG_LEAVE;
-}
-
-static inline int master_tx_message(struct message_header *m)
-{
-	return m->op == SD_MSG_MASTER_TRANSFER;
-}
-
 static int get_node_idx(struct sheepdog_node_list_entry *ent,
 			struct sheepdog_node_list_entry *entries, int nr_nodes)
 {
@@ -366,7 +330,6 @@ forward:
 		return;
 	}
 
-	msg->header.op = SD_MSG_VDI_OP;
 	msg->header.state = DM_FIN;
 	msg->header.msg_length = sizeof(*msg) + hdr->data_length;
 	msg->header.from = sys->this_node;
@@ -413,19 +376,6 @@ static struct node *find_node(struct list_head *node_list,
 	return NULL;
 }
 
-static int is_master(void)
-{
-	struct node *node;
-
-	if (!sys->join_finished)
-		return 0;
-
-	node = list_first_entry(&sys->sd_node_list, struct node, list);
-	if (is_myself(node->ent.addr, node->ent.port))
-		return 1;
-	return 0;
-}
-
 static inline int get_nodes_nr_from(struct list_head *l)
 {
 	struct node *node;
@@ -473,68 +423,6 @@ static struct sheepdog_node_list_entry *find_entry_epoch(struct sheepdog_node_li
 	return NULL;
 }
 
-static int add_node_to_leave_list(struct message_header *msg)
-{
-	int ret = SD_RES_SUCCESS;
-	int nr, i, le = get_latest_epoch();
-	LIST_HEAD(tmp_list);
-	struct node *n, *t;
-	struct join_message *jm;
-
-	if (leave_message(msg)) {
-		n = zalloc(sizeof(*n));
-		if (!n) {
-			ret = SD_RES_NO_MEM;
-			goto err;
-		}
-
-		if (find_entry_list(&msg->from, &sys->leave_list)
-		    || !find_entry_epoch(&msg->from, le)) {
-			free(n);
-			goto ret;
-		}
-
-		n->ent = msg->from;
-
-		list_add_tail(&n->list, &sys->leave_list);
-		goto ret;
-	} else if (join_message(msg)) {
-		jm = (struct join_message *)msg;
-		nr = jm->nr_leave_nodes;
-		for (i = 0; i < nr; i++) {
-			n = zalloc(sizeof(*n));
-			if (!n) {
-				ret = SD_RES_NO_MEM;
-				goto free;
-			}
-
-			if (find_entry_list(&jm->leave_nodes[i].ent, &sys->leave_list)
-			    || !find_entry_epoch(&jm->leave_nodes[i].ent, le)) {
-				free(n);
-				continue;
-			}
-
-			n->ent = jm->leave_nodes[i].ent;
-
-			list_add_tail(&n->list, &tmp_list);
-		}
-		list_splice_init(&tmp_list, &sys->leave_list);
-		goto ret;
-	} else {
-		ret = SD_RES_INVALID_PARMS;
-		goto err;
-	}
-free:
-	list_for_each_entry_safe(n, t, &tmp_list, list) {
-		free(n);
-	}
-ret:
-	dprintf("%d\n", get_nodes_nr_from(&sys->leave_list));
-	print_node_list(&sys->leave_list);
-err:
-	return ret;
-}
-
 static int cluster_sanity_check(struct sheepdog_node_list_entry *entries,
 			     int nr_entries, uint64_t ctime, uint32_t epoch)
 {
@@ -778,8 +666,9 @@ static void update_cluster_info(struct join_message *msg,
 				struct sheepdog_node_list_entry *nodes,
 				size_t nr_nodes)
 {
-	int i;
-	int ret;
+	int i, le;
+	int ret, nr_leave_nodes;
+	struct node *n;
 
 	eprintf("status = %d, epoch = %d, %x, %d\n", msg->cluster_status, msg->epoch, msg->result, sys->join_finished);
 
@@ -806,8 +695,25 @@ static void update_cluster_info(struct join_message *msg,
 				node_to_str(&nodes[i]));
 	}
 
-	if (msg->cluster_status == SD_STATUS_WAIT_FOR_JOIN)
-		add_node_to_leave_list((struct message_header *)msg);
+	if (msg->cluster_status != SD_STATUS_OK) {
+		nr_leave_nodes = msg->nr_leave_nodes;
+		le = get_latest_epoch();
+		for (i = 0; i < nr_leave_nodes; i++) {
+			n = zalloc(sizeof(*n));
+			if (!n)
+				panic("oom\n");
+
+			if (find_entry_list(&msg->leave_nodes[i].ent, &sys->leave_list)
+			    || !find_entry_epoch(&msg->leave_nodes[i].ent, le)) {
+				free(n);
+				continue;
+			}
+
+			n->ent = msg->leave_nodes[i].ent;
+
+			list_add_tail(&n->list, &sys->leave_list);
+		}
+	}
 
 	sys->join_finished = 1;
 
@@ -922,8 +828,14 @@ static void vdi_op(void *arg)
 	rsp->result = ret;
 }
 
-static void vdi_op_done(struct vdi_op_message *msg)
+static void __sd_notify(struct cpg_event *cevent)
+{
+}
+
+static void __sd_notify_done(struct cpg_event *cevent)
 {
+	struct work_notify *w = container_of(cevent, struct work_notify, cev);
+	struct vdi_op_message *msg = (struct vdi_op_message *)w->msg;
 	const struct sd_vdi_req *hdr = &msg->req;
 	struct sd_vdi_rsp *rsp = &msg->rsp;
 	void *data = msg->data;
@@ -1010,136 +922,6 @@ out:
 	req->done(req);
 }
 
-static void __sd_notify(struct cpg_event *cevent)
-{
-	struct work_notify *w = container_of(cevent, struct work_notify, cev);
-	struct message_header *m = w->msg;
-	char name[128];
-	struct node *node;
-
-	dprintf("op: %d, state: %u, size: %d, from: %s, port: %d\n",
-		m->op, m->state, m->msg_length,
-		addr_to_str(name, sizeof(name), m->from.addr, m->from.port),
-		m->from.port);
-
-	/*
-	 * we don't want to perform any deliver events except mastership_tx event
-	 * until we join; we wait for our JOIN message.
-	 */
-	if (!sys->join_finished && !master_tx_message(m)) {
-		if (node_cmp(&m->from, &sys->this_node) != 0) {
-			cevent->skip = 1;
-			return;
-		}
-	}
-
-	if (join_message(m)) {
-		node = find_node(&sys->cpg_node_list, &m->from);
-		if (!node) {
-			dprintf("the node was left before join operation is finished\n");
-			return;
-		}
-
-		node->ent = m->from;
-	}
-}
-
-static void __sd_notify_done(struct cpg_event *cevent)
-{
-	struct work_notify *w = container_of(cevent, struct work_notify, cev);
-	struct message_header *m;
-	char name[128];
-	int do_recovery;
-	struct node *node, *t;
-	int nr, nr_local, nr_leave;
-
-	m = w->msg;
-
-	if (m->state == DM_FIN) {
-		switch (m->op) {
-		case SD_MSG_JOIN:
-			break;
-		case SD_MSG_LEAVE:
-			node = find_node(&sys->sd_node_list, &m->from);
-			if (node) {
-				sys->nr_vnodes = 0;
-
-				list_del(&node->list);
-				free(node);
-				if (sys->status == SD_STATUS_OK) {
-					sys->epoch++;
-					update_epoch_log(sys->epoch);
-					update_epoch_store(sys->epoch);
-				}
-			}
-		/* fall through */
-		case SD_MSG_MASTER_TRANSFER:
-			if (sys->status == SD_STATUS_WAIT_FOR_JOIN) {
-				add_node_to_leave_list(m);
-
-				/* Sheep needs this to identify itself as master.
-				 * Now mastership transfer is done.
-				 */
-				if (!sys->join_finished) {
-					sys->join_finished = 1;
-					move_node_to_sd_list(sys->this_node);
-					sys->epoch = get_latest_epoch();
-				}
-
-				nr_local = get_nodes_nr_epoch(sys->epoch);
-				nr = get_nodes_nr_from(&sys->sd_node_list);
-				nr_leave = get_nodes_nr_from(&sys->leave_list);
-
-				dprintf("%d == %d + %d \n", nr_local, nr, nr_leave);
-				if (nr_local == nr + nr_leave) {
-					sys->status = SD_STATUS_OK;
-					update_epoch_log(sys->epoch);
-					update_epoch_store(sys->epoch);
-				}
-			}
-			break;
-		case SD_MSG_VDI_OP:
-			break;
-		default:
-			eprintf("unknown message %d\n", m->op);
-			break;
-		}
-	}
-
-	do_recovery = (m->state == DM_FIN &&
-		       (join_message(m) || leave_message(m)));
-
-	dprintf("op: %d, state: %u, size: %d, from: %s\n",
-		m->op, m->state, m->msg_length,
-		addr_to_str(name, sizeof(name), m->from.addr,
-			    m->from.port));
-
-	if (m->state == DM_INIT && is_master()) {
-		switch (m->op) {
-		case SD_MSG_JOIN:
-			break;
-		default:
-			eprintf("unknown message %d\n", m->op);
-			break;
-		}
-	}
-
-	if (do_recovery &&
-	    (sys->status == SD_STATUS_OK || sys->status == SD_STATUS_HALT)) {
-		list_for_each_entry_safe(node, t, &sys->leave_list, list) {
-			list_del(&node->list);
-		}
-		start_recovery(sys->epoch);
-	}
-
-	if (sys->status == SD_STATUS_HALT) {
-		int nr_zones = get_zones_nr_from(&sys->sd_node_list);
-
-		if (nr_zones >= sys->nr_sobjs)
-			sys->status = SD_STATUS_OK;
-	}
-}
-
 static void sd_notify_handler(struct sheepdog_node_list_entry *sender,
 			      void *msg, size_t msg_len)
 {
@@ -1148,8 +930,8 @@ static void sd_notify_handler(struct sheepdog_node_list_entry *sender,
 	struct message_header *m = msg;
 	char name[128];
 
-	dprintf("op: %d, state: %u, size: %d, from: %s, pid: %u\n",
-		m->op, m->state, m->msg_length,
+	dprintf("state: %u, size: %d, from: %s, pid: %u\n",
+		m->state, m->msg_length,
 		addr_to_str(name, sizeof(name), m->from.addr, m->from.port),
 		sender->port);
 
@@ -1368,7 +1150,6 @@ static int send_join_request(struct sheepdog_node_list_entry *ent)
 
 	memset(&msg, 0, sizeof(msg));
 	msg.header.proto_ver = SD_SHEEP_PROTO_VER;
-	msg.header.op = SD_MSG_JOIN;
 	msg.header.state = DM_INIT;
 	msg.header.msg_length = sizeof(msg);
 	msg.header.from = *ent;
@@ -1549,10 +1330,7 @@ static void cpg_event_done(struct work *work, int idx)
 		if (sys->join_finished && w->msg->state == DM_INIT)
 			cpg_event_set_joining();
 
-		if (vdi_op_message(w->msg))
-			vdi_op_done((struct vdi_op_message *)w->msg);
-		else
-			__sd_notify_done(cevent);
+		__sd_notify_done(cevent);
 		break;
 	}
 	case CPG_EVENT_REQUEST:
@@ -2032,16 +1810,5 @@ int create_cluster(int port, int64_t zone)
 /* after this function is called, this node only works as a gateway */
 int leave_cluster(void)
 {
-	struct leave_message msg;
-
-	memset(&msg, 0, sizeof(msg));
-	msg.header.proto_ver = SD_SHEEP_PROTO_VER;
-	msg.header.op = SD_MSG_LEAVE;
-	msg.header.state = DM_FIN;
-	msg.header.msg_length = sizeof(msg);
-	msg.header.from = sys->this_node;
-	msg.epoch = get_latest_epoch();
-
-	dprintf("%d\n", msg.epoch);
-	return sys->cdrv->notify(&msg, msg.header.msg_length, NULL);
+	return sys->cdrv->leave();
 }
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 798aee0..17872f9 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -26,12 +26,6 @@
 #define SD_OP_GET_OBJ_LIST   0xA1
 #define SD_OP_GET_EPOCH      0XA2
 
-#define SD_MSG_JOIN             0x01
-#define SD_MSG_VDI_OP           0x02
-#define SD_MSG_MASTER_CHANGED   0x03
-#define SD_MSG_LEAVE            0x04
-#define SD_MSG_MASTER_TRANSFER  0x05
-
 #define SD_STATUS_OK                0x00
 #define SD_STATUS_WAIT_FOR_FORMAT   0x01
 #define SD_STATUS_WAIT_FOR_JOIN     0x02
-- 
1.7.2.5




More information about the sheepdog mailing list