[sheepdog] [PATCH v3 08/10] sheep: cleanup the global copies number

Tue Aug 7 13:20:55 CEST 2012

From: levin li <xingke.lwp at taobao.com>

Since every VDI has its own copies number, there's no need to
keep the global copies number

Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 sheep/group.c      |   50 ++++++++------------------------------------------
 sheep/ops.c        |   18 +++---------------
 sheep/sheep_priv.h |    3 ---
 sheep/store.c      |   46 ----------------------------------------------
 4 files changed, 11 insertions(+), 106 deletions(-)

diff --git a/sheep/group.c b/sheep/group.c
index b06a667..d625d78 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -84,20 +84,24 @@ static int get_zones_nr_from(struct sd_node *nodes, int nr_nodes)
 
 bool have_enough_zones(void)
 {
+	int max_copies;
+
 	if (sys->flags & SD_FLAG_NOHALT)
 		return true;
 
 	if (!current_vnode_info)
 		return false;
 
-	dprintf("flags %d, nr_zones %d, copies %d\n",
-		sys->flags, current_vnode_info->nr_zones, sys->nr_copies);
+	max_copies = get_max_copy_number();
+
+	dprintf("flags %d, nr_zones %d, min copies %d\n",
+		sys->flags, current_vnode_info->nr_zones, max_copies);
 
 	if (sys->flags & SD_FLAG_QUORUM) {
-		if (current_vnode_info->nr_zones > (sys->nr_copies/2))
+		if (current_vnode_info->nr_zones > (max_copies/2))
 			return true;
 	} else {
-		if (current_vnode_info->nr_zones >= sys->nr_copies)
+		if (current_vnode_info->nr_zones >= max_copies)
 			return true;
 	}
 	return false;
@@ -114,18 +118,6 @@ static int get_node_idx(struct vnode_info *vnode_info, struct sd_node *ent)
 }
 
 /*
- * If we have less zones available than the desired redundancy we have to do
- * with nr_zones copies, sorry.
- *
- * Note that you generally want to use get_nr_copies below, as it uses the
- * current vnode state snapshot instead of global data.
- */
-int get_max_nr_copies_from(struct sd_node *nodes, int nr_nodes)
-{
-	return min((int)sys->nr_copies, get_zones_nr_from(nodes, nr_nodes));
-}
-
-/*
  * Grab an additional reference to the passed in vnode info.
  *
  * The caller must already hold a reference to vnode_info, this function must
@@ -237,15 +229,6 @@ int local_get_node_list(const struct sd_req *req, struct sd_rsp *rsp,
 }
 
 /*
- * If we have less zones available than the desired redundancy we have to do
- * with nr_zones copies, sorry.
- */
-int get_nr_copies(struct vnode_info *vnode_info)
-{
-	return min(vnode_info->nr_zones, (int)sys->nr_copies);
-}
-
-/*
  * Indicator if a cluster operation is currently running.
  */
 static bool cluster_op_running = false;
@@ -486,12 +469,6 @@ static int cluster_sanity_check(struct join_message *jm)
 {
 	uint64_t local_ctime = get_cluster_ctime();
 	uint32_t local_epoch = get_latest_epoch();
-	uint8_t local_nr_copies;
-
-	if (get_cluster_copies(&local_nr_copies)) {
-		eprintf("failed to get nr_copies\n");
-		return CJ_RES_FAIL;
-	}
 
 	if (jm->ctime != local_ctime) {
 		eprintf("joining node ctime doesn't match: %"
@@ -507,12 +484,6 @@ static int cluster_sanity_check(struct join_message *jm)
 		return CJ_RES_FAIL;
 	}
 
-	if (jm->nr_copies != local_nr_copies) {
-		eprintf("joining node nr_copies doesn't match: %u vs %u\n",
-			jm->nr_copies, local_nr_copies);
-		return CJ_RES_FAIL;
-	}
-
 	if (jm->cluster_flags != sys->flags) {
 		eprintf("joining node cluster_flags don't match: %u vs %u\n",
 			jm->cluster_flags, sys->flags);
@@ -846,10 +817,8 @@ static void update_cluster_info(struct join_message *msg,
 	case SD_STATUS_HALT:
 		switch (sys->status) {
 		case SD_STATUS_WAIT_FOR_FORMAT:
-			sys->nr_copies = msg->nr_copies;
 			sys->flags = msg->cluster_flags;
 
-			set_cluster_copies(sys->nr_copies);
 			set_cluster_flags(sys->flags);
 			set_cluster_ctime(msg->ctime);
 			/*FALLTHROUGH*/
@@ -1008,7 +977,6 @@ enum cluster_join_result sd_check_join_cb(struct sd_node *joining, void *opaque)
 		addr_to_str(str, sizeof(str), joining->nid.addr, joining->nid.port),
 		ret, jm->cluster_status);
 
-	jm->nr_copies = sys->nr_copies;
 	jm->cluster_flags = sys->flags;
 	jm->epoch = sys->epoch;
 	jm->ctime = get_cluster_ctime();
@@ -1030,7 +998,6 @@ static int send_join_request(struct sd_node *ent)
 
 	msg = xzalloc(sizeof(*msg) + SD_MAX_NODES * sizeof(msg->nodes[0]));
 	msg->proto_ver = SD_SHEEP_PROTO_VER;
-	msg->nr_copies = sys->nr_copies;
 	msg->cluster_flags = sys->flags;
 	msg->epoch = sys->epoch;
 	msg->ctime = get_cluster_ctime();
@@ -1216,7 +1183,6 @@ int create_cluster(int port, int64_t zone, int nr_vnodes,
 	if (get_latest_epoch() > 0) {
 		sys->status = SD_STATUS_WAIT_FOR_JOIN;
 
-		get_cluster_copies(&sys->nr_copies);
 		get_cluster_flags(&sys->flags);
 
 		sys->epoch = get_latest_epoch();
diff --git a/sheep/ops.c b/sheep/ops.c
index a560e3a..6455ae4 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -153,7 +153,7 @@ static int cluster_del_vdi(struct request *req)
 {
 	const struct sd_req *hdr = &req->rq;
 	struct sd_rsp *rsp = &req->rp;
-	uint32_t vid = 0, nr_copies = sys->nr_copies;
+	uint32_t vid = 0, nr_copies = SD_DEFAULT_COPIES;
 	int ret;
 
 	ret = del_vdi(req, req->data, hdr->data_length,
@@ -172,7 +172,7 @@ static int cluster_get_vdi_info(struct request *req)
 {
 	const struct sd_req *hdr = &req->rq;
 	struct sd_rsp *rsp = &req->rp;
-	uint32_t vid = 0, nr_copies = sys->nr_copies;
+	uint32_t vid = 0, nr_copies = SD_DEFAULT_COPIES;
 	void *tag;
 	int ret;
 
@@ -242,10 +242,7 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
 	if (ret != SD_RES_SUCCESS)
 		return ret;
 
-	sys->nr_copies = hdr->copies;
 	sys->flags = hdr->flags;
-	if (!sys->nr_copies)
-		sys->nr_copies = SD_DEFAULT_COPIES;
 
 	created_time = hdr->ctime;
 	set_cluster_ctime(created_time);
@@ -262,7 +259,6 @@ static int cluster_make_fs(const struct sd_req *req, struct sd_rsp *rsp,
 	if (ret)
 		return SD_RES_EIO;
 
-	set_cluster_copies(sys->nr_copies);
 	set_cluster_flags(sys->flags);
 	if (have_enough_zones())
 		sys->status = SD_STATUS_OK;
@@ -329,7 +325,7 @@ static int cluster_get_vdi_attr(struct request *req)
 {
 	const struct sd_req *hdr = &req->rq;
 	struct sd_rsp *rsp = &req->rp;
-	uint32_t vid = 0, attrid = 0, nr_copies = sys->nr_copies;
+	uint32_t vid = 0, attrid = 0, nr_copies = SD_DEFAULT_COPIES;
 	uint64_t created_time = 0;
 	int ret;
 	struct sheepdog_vdi_attr *vattr;
@@ -417,8 +413,6 @@ static int local_stat_cluster(struct request *req)
 			log->nr_nodes = epoch_log_read_remote(epoch, log->nodes,
 							      sizeof(log->nodes));
 
-		log->nr_copies = get_max_nr_copies_from(log->nodes, log->nr_nodes);
-
 		rsp->data_length += sizeof(*log);
 		/* FIXME: this hack would require sizeof(time_t) < sizeof(log->nodes[0]) */
 		log->time = *(uint64_t *)(&log->nodes[log->nr_nodes]);
@@ -476,7 +470,6 @@ static int cluster_force_recover(const struct sd_req *req, struct sd_rsp *rsp,
 {
 	struct vnode_info *old_vnode_info, *vnode_info;
 	int ret = SD_RES_SUCCESS;
-	uint8_t c;
 	uint16_t f;
 
 	/* We should manually recover the cluster when
@@ -487,14 +480,10 @@ static int cluster_force_recover(const struct sd_req *req, struct sd_rsp *rsp,
 	if (sys->status != SD_STATUS_WAIT_FOR_JOIN)
 		return SD_RES_FORCE_RECOVER;
 
-	ret = get_cluster_copies(&c);
-	if (ret)
-		return ret;
 	ret = get_cluster_flags(&f);
 	if (ret)
 		return ret;
 
-	sys->nr_copies = c;
 	sys->flags = f;
 
 	old_vnode_info = get_vnode_info_epoch(sys->epoch);
@@ -768,7 +757,6 @@ int peer_read_obj(struct request *req)
 		goto out;
 
 	rsp->data_length = hdr->data_length;
-	rsp->obj.copies = sys->nr_copies;
 out:
 	return ret;
 }
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index a679660..eb77706 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -95,7 +95,6 @@ struct cluster_info {
 
 	DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS);
 
-	uint8_t nr_copies;
 	int req_efd;
 
 	pthread_mutex_t wait_req_lock;
@@ -253,8 +252,6 @@ void queue_cluster_request(struct request *req);
 int update_epoch_log(uint32_t epoch, struct sd_node *nodes, size_t nr_nodes);
 int log_current_epoch(void);
 
-int set_cluster_copies(uint8_t copies);
-int get_cluster_copies(uint8_t *copies);
 int set_cluster_flags(uint16_t flags);
 int get_cluster_flags(uint16_t *flags);
 int set_cluster_store(const char *name);
diff --git a/sheep/store.c b/sheep/store.c
index 4839d13..f03d086 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -635,52 +635,6 @@ int remove_object(uint64_t oid, int copies)
 	return ret;
 }
 
-int set_cluster_copies(uint8_t copies)
-{
-	int fd, ret;
-	void *jd;
-
-	fd = open(config_path, O_DSYNC | O_WRONLY);
-	if (fd < 0)
-		return SD_RES_EIO;
-
-	jd = jrnl_begin(&copies, sizeof(copies),
-			offsetof(struct sheepdog_config, copies),
-			config_path, jrnl_path);
-	if (!jd) {
-		ret = SD_RES_EIO;
-		goto err;
-	}
-
-	ret = xpwrite(fd, &copies, sizeof(copies), offsetof(struct sheepdog_config, copies));
-	if (ret != sizeof(copies))
-		ret = SD_RES_EIO;
-	else
-		ret = SD_RES_SUCCESS;
-	jrnl_end(jd);
-err:
-	close(fd);
-	return ret;
-}
-
-int get_cluster_copies(uint8_t *copies)
-{
-	int fd, ret;
-
-	fd = open(config_path, O_RDONLY);
-	if (fd < 0)
-		return SD_RES_EIO;
-
-	ret = xpread(fd, copies, sizeof(*copies),
-		     offsetof(struct sheepdog_config, copies));
-	close(fd);
-
-	if (ret != sizeof(*copies))
-		return SD_RES_EIO;
-
-	return SD_RES_SUCCESS;
-}
-
 int set_cluster_flags(uint16_t flags)
 {
 	int fd, ret = SD_RES_EIO;
-- 
1.7.1