[Sheepdog] [PATCH v3 5/8] sheep: add flush_vdi operation

Liu Yuan namei.unix at gmail.com
Thu Mar 15 12:49:19 CET 2012


From: Liu Yuan <tailai.ly at taobao.com>

This is supposed to be initiated by Guest OS, but our collie
friend might also like it.

Flush operation is operated on vdi basis, that is, when one guest
flush its own dirty data, other guests are not affected.

- use forward_write_obj_req() to flush dirty objects

Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
 include/sheepdog_proto.h |    1 +
 sheep/object_cache.c     |   49 +++++++++++++++++++++++++++++++++++++++++++--
 sheep/ops.c              |    5 ++++
 sheep/sheep_priv.h       |    3 ++
 sheep/store.c            |   27 +++++++++++++++++++++++-
 5 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index 2d0d5ec..8cd1cac 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -29,6 +29,7 @@
 #define SD_OP_RELEASE_VDI    0x13
 #define SD_OP_GET_VDI_INFO   0x14
 #define SD_OP_READ_VDIS      0x15
+#define SD_OP_FLUSH_VDI      0x16
 
 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index b9d7913..ee1c96b 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -286,7 +286,6 @@ static int create_cache_object(struct object_cache *oc, uint32_t idx, void *buff
 out_close:
 	close(fd);
 out:
-	strbuf_release(&buf);
 	return ret;
 }
 
@@ -371,10 +370,54 @@ static uint64_t idx_to_oid(uint32_t vid, uint32_t idx)
 		return vid_to_data_oid(vid, idx);
 }
 
-static int push_cache_object(int fd, uint64_t oid)
+static int push_cache_object(uint32_t vid, uint32_t idx)
 {
+	struct request fake_req;
+	struct sd_obj_req *hdr = (struct sd_obj_req *)&fake_req.rq;
+	void *buf;
+	unsigned data_length;
+	int ret = SD_RES_NO_MEM;
+	uint64_t oid = idx_to_oid(vid, idx);
+
 	dprintf("%"PRIx64"\n", oid);
-	return 0;
+
+	memset(&fake_req, 0, sizeof(fake_req));
+	if (is_vdi_obj(oid))
+		data_length = sizeof(struct sheepdog_inode);
+	else
+		data_length = SD_DATA_OBJ_SIZE;
+
+	buf = malloc(data_length);
+	if (buf == NULL) {
+		eprintf("failed to allocate memory\n");
+		goto out;
+	}
+
+	ret = read_cache_object(vid, idx, buf, data_length, 0);
+	if (ret != SD_RES_SUCCESS)
+		goto out;
+
+	hdr->offset = 0;
+	hdr->data_length = data_length;
+	hdr->opcode = SD_OP_WRITE_OBJ;
+	hdr->flags = SD_FLAG_CMD_WRITE;
+	hdr->oid = oid;
+	hdr->copies = sys->nr_sobjs;
+	hdr->epoch = sys->epoch;
+	fake_req.data = buf;
+	fake_req.op = get_sd_op(SD_OP_WRITE_OBJ);
+	fake_req.entry = sys->vnodes;
+	fake_req.nr_vnodes = sys->nr_vnodes;
+	fake_req.nr_zones = get_zones_nr_from(sys->nodes, sys->nr_vnodes);
+
+	ret = forward_write_obj_req(&fake_req);
+	if (ret != SD_RES_SUCCESS) {
+		eprintf("failed to push object %x\n", ret);
+		goto out;
+	}
+out:
+	free(buf);
+	return ret;
 }
 
 /* Push back all the dirty objects to sheep cluster storage */
diff --git a/sheep/ops.c b/sheep/ops.c
index 4a672be..6b889fd 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -588,6 +588,11 @@ static struct sd_op_template sd_ops[] = {
 		.type = SD_OP_TYPE_IO,
 		.process_work = store_remove_obj,
 	},
+
+	[SD_OP_FLUSH_VDI] = {
+		.type = SD_OP_TYPE_IO,
+		.process_work = store_flush_vdi,
+	},
 };
 
 struct sd_op_template *get_sd_op(uint8_t opcode)
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index af2f80b..6e588c3 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -246,6 +246,7 @@ int write_object_local(uint64_t oid, char *data, unsigned int datalen,
 		       uint32_t epoch, int create);
 int read_object_local(uint64_t oid, char *data, unsigned int datalen,
 		      uint64_t offset, int copies, uint32_t epoch);
+int forward_write_obj_req(struct request *req);
 
 int read_epoch(uint32_t *epoch, uint64_t *ctime,
 	       struct sd_node *entries, int *nr_entries);
@@ -265,6 +266,7 @@ int store_create_and_write_obj(const struct sd_req *, struct sd_rsp *, void *);
 int store_write_obj(const struct sd_req *, struct sd_rsp *, void *);
 int store_read_obj(const struct sd_req *, struct sd_rsp *, void *);
 int store_remove_obj(const struct sd_req *, struct sd_rsp *, void *);
+int store_flush_vdi(const struct sd_req *req, struct sd_rsp *rsp, void *data);
 
 int store_file_write(void *buffer, size_t len);
 void *store_file_read(void);
@@ -285,6 +287,7 @@ int get_obj_list(const struct sd_list_req *hdr, struct sd_list_rsp *rsp, void *d
 int start_recovery(uint32_t epoch);
 void resume_recovery_work(void);
 int is_recoverying_oid(uint64_t oid);
+int node_in_recovery(void);
 
 int write_object(struct sd_vnode *e,
 		 int vnodes, int zones, uint32_t node_version,
diff --git a/sheep/store.c b/sheep/store.c
index 43d7b0d..566409f 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -288,7 +288,7 @@ out:
 	return ret;
 }
 
-static int forward_write_obj_req(struct request *req)
+int forward_write_obj_req(struct request *req)
 {
 	int i, n, nr, fd, ret, pollret;
 	unsigned wlen;
@@ -585,6 +585,24 @@ out:
 	return ret;
 }
 
+int store_flush_vdi(const struct sd_req *req, struct sd_rsp *rsp, void *data)
+{
+	struct sd_obj_req *hdr = (struct sd_obj_req *)req;
+	uint64_t oid = hdr->oid;
+	uint32_t vid = oid_to_vid(oid);
+	struct object_cache *cache = find_object_cache(vid);
+
+	/*
+	 * We don't propagate the error to Guests because 
+	 * 1) Guests can't handle it for now
+	 * 2) Next flush reqeust will flush again
+	 */
+	if (cache)
+		object_cache_push(cache);
+
+	return SD_RES_SUCCESS;
+}
+
 static int do_write_obj(struct siocb *iocb, struct sd_obj_req *req, uint32_t epoch, void *data)
 {
 	struct sd_obj_req *hdr = (struct sd_obj_req *)req;
@@ -786,7 +804,7 @@ void do_io_request(struct work *work)
 	if (hdr->flags & SD_FLAG_CMD_RECOVERY)
 		epoch = hdr->tgt_epoch;
 
-	if (hdr->flags & SD_FLAG_CMD_IO_LOCAL) {
+	if (hdr->flags & SD_FLAG_CMD_IO_LOCAL || opcode == SD_OP_FLUSH_VDI) {
 		ret = do_local_io(req, epoch);
 	} else {
 		/* fix object consistency when we read the object for the first time */
@@ -1486,6 +1504,11 @@ void resume_recovery_work(void)
 	queue_work(sys->recovery_wqueue, &rw->work);
 }
 
+int node_in_recovery(void)
+{
+	return !!recovering_work;
+}
+
 int is_recoverying_oid(uint64_t oid)
 {
 	uint64_t hval = fnv_64a_buf(&oid, sizeof(uint64_t), FNV1A_64_INIT);
-- 
1.7.8.2




More information about the sheepdog mailing list