[sheepdog] [PATCH 3/4] fix sys->epoch race

Liu Yuan namei.unix at gmail.com
Fri May 18 13:06:19 CEST 2012


From: Liu Yuan <tailai.ly at taobao.com>

We shouldn't use sys->epoch directly in worker threads. Add a atomic helper
for it.

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 sheep/farm/farm.c    |   10 ++++++----
 sheep/farm/snap.c    |    2 +-
 sheep/group.c        |    4 ++--
 sheep/object_cache.c |    6 +++---
 sheep/sheep_priv.h   |    7 +++++++
 5 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index e94c762..ae7b153 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -111,7 +111,7 @@ static int farm_write(uint64_t oid, struct siocb *iocb, int create)
 {
 	int flags = def_open_flags, fd, ret;
 
-	if (iocb->epoch < sys->epoch) {
+	if (iocb->epoch < sys_epoch()) {
 	/* We are expecting that upper layer would retry it for in-fly IO
 	 * while the cluster is in recovery to avoid data inconsistency.
 	 */
@@ -444,8 +444,9 @@ out:
 static int farm_read(uint64_t oid, struct siocb *iocb)
 {
 	int flags = def_open_flags, fd, ret = SD_RES_SUCCESS;
+	uint32_t epoch = sys_epoch();
 
-	if (iocb->epoch < sys->epoch) {
+	if (iocb->epoch < epoch) {
 		int i;
 		void *buffer;
 
@@ -459,7 +460,7 @@ static int farm_read(uint64_t oid, struct siocb *iocb)
 			 * in this case, we should try to retrieve object upwards, since.
 			 * when the object is to be removed, it will get written to the
 			 * snapshot at later epoch. */
-			for (i = iocb->epoch; i < sys->epoch; i++) {
+			for (i = iocb->epoch; i < epoch; i++) {
 				buffer = retrieve_object_from_snap(oid, i);
 				if (buffer)
 					break;
@@ -553,10 +554,11 @@ static int farm_link(uint64_t oid, struct siocb *iocb, uint32_t tgt_epoch)
 	void *buf = NULL;
 	struct siocb io = { 0 };
 	int i;
+	uint32_t epoch = sys_epoch();
 
 	dprintf("try link %"PRIx64" from snapshot with epoch %d\n", oid, tgt_epoch);
 
-	for (i = tgt_epoch; i < sys->epoch; i++) {
+	for (i = tgt_epoch; i < epoch; i++) {
 		buf = retrieve_object_from_snap(oid, i);
 		if (buf)
 			break;
diff --git a/sheep/farm/snap.c b/sheep/farm/snap.c
index 957ea13..3134c28 100644
--- a/sheep/farm/snap.c
+++ b/sheep/farm/snap.c
@@ -160,7 +160,7 @@ int snap_file_write(uint32_t epoch, unsigned char *trunksha1, unsigned char *out
 	int ret = 0;
 	struct strbuf buf = STRBUF_INIT;
 	struct sd_node nodes[SD_MAX_NODES];
-	int tgt_epoch = user ? sys->epoch : epoch;
+	int tgt_epoch = user ? sys_epoch() : epoch;
 	uint64_t epoch_size = epoch_log_read(tgt_epoch, (char *)nodes, sizeof(nodes));
 	struct sha1_file_hdr hdr = { .size = epoch_size + SHA1_LEN,
 				     .priv = tgt_epoch };
diff --git a/sheep/group.c b/sheep/group.c
index 7c5c8f5..2d92d8a 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -662,7 +662,7 @@ static void update_cluster_info(struct join_message *msg,
 	if (msg->cluster_status == SD_STATUS_OK ||
 	    msg->cluster_status == SD_STATUS_HALT) {
 		if (msg->inc_epoch) {
-			sys->epoch++;
+			uatomic_inc(&sys->epoch);
 			update_epoch_log(sys->epoch, sys->nodes, sys->nr_nodes);
 			update_epoch_store(sys->epoch);
 		}
@@ -1314,7 +1314,7 @@ void sd_leave_handler(struct sd_node *left, struct sd_node *members,
 	update_node_info(members, nr_members);
 
 	if (sys_can_recover()) {
-		sys->epoch++;
+		uatomic_inc(&sys->epoch);
 		update_epoch_store(sys->epoch);
 		update_epoch_log(sys->epoch, sys->nodes, sys->nr_nodes);
 	}
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 954476a..cf1c302 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -435,7 +435,7 @@ int object_cache_pull(struct vnode_info *vnode_info, struct object_cache *oc,
 		v = vnodes[i];
 		if (vnode_is_local(v)) {
 			struct siocb iocb = { 0 };
-			iocb.epoch = sys->epoch;
+			iocb.epoch = sys_epoch();
 			iocb.buf = buf;
 			iocb.length = data_length;
 			iocb.offset = 0;
@@ -458,7 +458,7 @@ pull_remote:
 			continue;
 
 		hdr.opcode = SD_OP_READ_OBJ;
-		hdr.epoch = sys->epoch;
+		hdr.epoch = sys_epoch();
 		hdr.data_length = rlen = data_length;
 		hdr.flags = SD_FLAG_CMD_IO_LOCAL;
 		hdr.obj.oid = oid;
@@ -546,7 +546,7 @@ static int push_cache_object(struct vnode_info *vnode_info, uint32_t vid,
 	hdr->opcode = create ? SD_OP_CREATE_AND_WRITE_OBJ : SD_OP_WRITE_OBJ;
 	hdr->flags = SD_FLAG_CMD_WRITE;
 	hdr->data_length = data_length;
-	hdr->epoch = sys->epoch;
+	hdr->epoch = sys_epoch();
 
 	hdr->obj.oid = oid;
 	hdr->obj.offset = 0;
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index ea236f0..5ca862e 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -12,6 +12,7 @@
 #define __SHEEP_PRIV_H__
 
 #include <inttypes.h>
+#include <urcu/uatomic.h>
 
 #include "sheepdog_proto.h"
 #include "event.h"
@@ -216,6 +217,12 @@ extern mode_t def_fmode;
 extern mode_t def_dmode;
 extern struct objlist_cache obj_list_cache;
 
+/* One should call this function to get sys->epoch outside main thread */
+static inline uint32_t sys_epoch(void)
+{
+	return uatomic_read(&sys->epoch);
+}
+
 int create_listen_port(int port, void *data);
 
 int init_store(const char *dir, int enable_write_cache);
-- 
1.7.8.2




More information about the sheepdog mailing list