[sheepdog] [PATCH v10 09/19] sheep, dog: prevent COW during snapshot creation
Hitoshi Mitake
mitake.hitoshi at gmail.com
Mon Jun 2 17:09:03 CEST 2014
Current generational reference counting scheme for garbage collection
of snapshot object requires mutual exclusion between COW operation and
snapshot creation. Because both of COW operation and snapshot creation
requires modification of generation reference status. This patch adds
operations for preventing/allowing COW temporally.
Cc: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
Tested-by: Valerio Pachera <sirio81 at gmail.com>
Cc: Alessandro Bolgia <alessandro at extensys.it>
Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
dog/vdi.c | 15 +++++++++++++++
include/internal_proto.h | 2 ++
sheep/gateway.c | 14 --------------
sheep/group.c | 3 +++
sheep/ops.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++
sheep/request.c | 37 ++++++++++++++++++++++++++++++++---
sheep/sheep_priv.h | 23 ++++++++++++++++++++++
7 files changed, 127 insertions(+), 17 deletions(-)
diff --git a/dog/vdi.c b/dog/vdi.c
index 866cb36..7e54fd9 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -497,6 +497,7 @@ static int vdi_snapshot(int argc, char **argv)
int ret;
char buf[SD_INODE_HEADER_SIZE];
struct sd_inode *inode = (struct sd_inode *)buf;
+ struct sd_req hdr;
if (vdi_cmd_data.snapshot_id != 0) {
sd_err("Please specify a non-integer value for "
@@ -513,6 +514,13 @@ static int vdi_snapshot(int argc, char **argv)
return EXIT_FAILURE;
}
+ sd_init_req(&hdr, SD_OP_PREVENT_COW);
+ ret = dog_exec_req(&sd_nid, &hdr, NULL);
+ if (ret < 0) {
+ sd_err("preventing COW failed");
+ return EXIT_FAILURE;
+ }
+
ret = dog_write_object(vid_to_vdi_oid(vid), 0,
vdi_cmd_data.snapshot_tag,
SD_MAX_VDI_TAG_LEN,
@@ -534,6 +542,13 @@ static int vdi_snapshot(int argc, char **argv)
" VDI ID of newly created snapshot: %x\n", new_vid, vid);
}
+ sd_init_req(&hdr, SD_OP_ALLOW_COW);
+ ret = dog_exec_req(&sd_nid, &hdr, NULL);
+ if (ret < 0) {
+ sd_err("allowing COW failed");
+ return EXIT_FAILURE;
+ }
+
return ret;
}
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 4afc87e..23967ba 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -104,6 +104,8 @@
#define SD_OP_ALTER_VDI_COPY 0xC0
#define SD_OP_DECREF_OBJ 0xC1
#define SD_OP_DECREF_PEER 0xC2
+#define SD_OP_PREVENT_COW 0xC3
+#define SD_OP_ALLOW_COW 0xC4
/* internal flags for hdr.flags, must be above 0x80 */
#define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/sheep/gateway.c b/sheep/gateway.c
index c787ec1..8868bce 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -613,20 +613,6 @@ static int update_obj_refcnt(const struct sd_req *hdr, uint32_t *vids,
false);
}
-/*
- * return true if the request updates a data_vdi_id field of a vdi object
- *
- * XXX: we assume that VMs don't update the inode header and the data_vdi_id
- * field at the same time.
- */
-static bool is_data_vid_update(const struct sd_req *hdr)
-{
- return is_vdi_obj(hdr->obj.oid) &&
- data_vid_offset(0) <= hdr->obj.offset &&
- hdr->obj.offset + hdr->data_length <=
- data_vid_offset(SD_INODE_DATA_INDEX);
-}
-
int gateway_read_obj(struct request *req)
{
uint64_t oid = req->rq.obj.oid;
diff --git a/sheep/group.c b/sheep/group.c
index 360771d..c5b322c 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -1098,6 +1098,9 @@ int create_cluster(int port, int64_t zone, int nr_vnodes,
if (ret != 0)
return -1;
+ INIT_LIST_HEAD(&sys->prevented_cow_request_queue);
+ INIT_LIST_HEAD(&sys->pending_prevent_cow_request_queue);
+
return 0;
}
diff --git a/sheep/ops.c b/sheep/ops.c
index f47aa46..e41c8bd 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -1184,6 +1184,44 @@ out:
return ret;
}
+static int local_prevent_cow(const struct sd_req *req, struct sd_rsp *rsp,
+ void *data)
+{
+ /* FIXME: change type of process_main() */
+ struct request *rq = container_of(req, struct request, rq);
+
+ sd_debug("preventing COW request, ongoing COW requests: %d",
+ sys->nr_ongoing_cow_request);
+
+ sys->prevent_cow = true;
+
+ if (sys->nr_ongoing_cow_request) {
+ list_add_tail(&rq->pending_prevent_cow_request_list,
+ &sys->pending_prevent_cow_request_queue);
+ get_request(rq);
+ }
+
+ return SD_RES_SUCCESS;
+}
+
+static int local_allow_cow(const struct sd_req *req, struct sd_rsp *rsp,
+ void *data)
+{
+ struct request *rq;
+
+ sd_debug("allowing COW request");
+
+ sys->prevent_cow = false;
+
+ list_for_each_entry(rq, &sys->prevented_cow_request_queue,
+ prevented_cow_request_list) {
+ list_del(&rq->prevented_cow_request_list);
+ requeue_request(rq);
+ }
+
+ return SD_RES_SUCCESS;
+}
+
static struct sd_op_template sd_ops[] = {
/* cluster operations */
@@ -1523,6 +1561,18 @@ static struct sd_op_template sd_ops[] = {
},
#endif
+ [SD_OP_PREVENT_COW] = {
+ .name = "PREVENT_COW",
+ .type = SD_OP_TYPE_LOCAL,
+ .process_main = local_prevent_cow,
+ },
+
+ [SD_OP_ALLOW_COW] = {
+ .name = "ALLOW_COW",
+ .type = SD_OP_TYPE_LOCAL,
+ .process_main = local_allow_cow,
+ },
+
/* gateway I/O operations */
[SD_OP_CREATE_AND_WRITE_OBJ] = {
.name = "CREATE_AND_WRITE_OBJ",
diff --git a/sheep/request.c b/sheep/request.c
index 6972f10..f5150d5 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -13,8 +13,6 @@
#include "sheep_priv.h"
-static void requeue_request(struct request *req);
-
static void del_requeue_request(struct request *req)
{
list_del(&req->request_list);
@@ -92,6 +90,24 @@ static void gateway_op_done(struct work *work)
struct request *req = container_of(work, struct request, work);
struct sd_req *hdr = &req->rq;
+ if (hdr->opcode == SD_OP_WRITE_OBJ && is_data_vid_update(hdr)) {
+ struct request *rq;
+
+ sys->nr_ongoing_cow_request--;
+ assert(0 <= sys->nr_ongoing_cow_request);
+ sd_debug("a number of ongoing cow request: %d",
+ sys->nr_ongoing_cow_request);
+
+ if (!sys->nr_ongoing_cow_request) {
+ list_for_each_entry(rq,
+ &sys->pending_prevent_cow_request_queue,
+ pending_prevent_cow_request_list) {
+ list_del(&rq->pending_prevent_cow_request_list);
+ put_request(rq);
+ }
+ }
+ }
+
switch (req->rp.result) {
case SD_RES_OLD_NODE_VER:
if (req->rp.epoch > sys->cinfo.epoch) {
@@ -336,6 +352,20 @@ queue_work:
goto end_request;
}
+ if (req->rq.opcode == SD_OP_WRITE_OBJ && is_data_vid_update(&req->rq)) {
+ if (sys->prevent_cow) {
+ sd_debug("preventing COW");
+ list_add_tail(&req->prevented_cow_request_list,
+ &sys->prevented_cow_request_queue);
+ return;
+ } else {
+ assert(0 <= sys->nr_ongoing_cow_request);
+ sys->nr_ongoing_cow_request++;
+ sd_debug("a number of ongoing cow request: %d",
+ sys->nr_ongoing_cow_request);
+ }
+ }
+
req->work.fn = do_process_work;
req->work.done = gateway_op_done;
queue_work(sys->gateway_wqueue, &req->work);
@@ -497,7 +527,7 @@ done:
put_request(req);
}
-static void requeue_request(struct request *req)
+void requeue_request(struct request *req)
{
if (req->vinfo) {
put_vnode_info(req->vinfo);
@@ -836,6 +866,7 @@ static void tx_main(struct work *work)
ci->conn.ipstr,
ci->conn.port);
}
+
free_request(ci->tx_req);
ci->tx_req = NULL;
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index da0acb2..a876fe5 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -114,6 +114,9 @@ struct request {
struct work work;
enum REQUST_STATUS status;
bool stat; /* true if this request is during stat */
+
+ struct list_node prevented_cow_request_list;
+ struct list_node pending_prevent_cow_request_list;
};
struct system_info {
@@ -162,6 +165,11 @@ struct system_info {
/* upgrade data layout before starting service if necessary*/
bool upgrade;
struct sd_stat stat;
+
+ bool prevent_cow;
+ int nr_ongoing_cow_request;
+ struct list_head prevented_cow_request_queue;
+ struct list_head pending_prevent_cow_request_queue;
};
struct disk {
@@ -426,6 +434,7 @@ void objlist_cache_remove(uint64_t oid);
void put_request(struct request *req);
void get_request(struct request *req);
+void requeue_request(struct request *req);
int sheep_bnode_writer(uint64_t oid, void *mem, unsigned int len,
uint64_t offset, uint32_t flags, int copies,
@@ -483,6 +492,20 @@ int gateway_decref_object(struct request *req);
bool is_erasure_oid(uint64_t oid);
uint8_t local_ec_index(struct vnode_info *vinfo, uint64_t oid);
+/*
+ * return true if the request updates a data_vdi_id field of a vdi object
+ *
+ * XXX: we assume that VMs don't update the inode header and the data_vdi_id
+ * field at the same time.
+ */
+static inline bool is_data_vid_update(const struct sd_req *hdr)
+{
+ return is_vdi_obj(hdr->obj.oid) &&
+ data_vid_offset(0) <= hdr->obj.offset &&
+ hdr->obj.offset + hdr->data_length <=
+ data_vid_offset(SD_INODE_DATA_INDEX);
+}
+
/* object_cache */
void object_cache_format(void);
--
1.9.1
More information about the sheepdog
mailing list