[sheepdog] [PATCH 2/4] Revert "sheep, dog: fast deep copy for snapshot"
Hitoshi Mitake
mitake.hitoshi at lab.ntt.co.jp
Thu Feb 5 03:33:53 CET 2015
This reverts commit 86a596edb9b760463f97e00cac6a1e1674eb8b70.
---
dog/vdi.c | 149 +++++++++-----------------------------
include/internal_proto.h | 1 -
include/sheepdog_proto.h | 5 +-
sheep/ops.c | 16 -----
sheep/sheep_priv.h | 1 -
sheep/vdi.c | 184 -----------------------------------------------
6 files changed, 34 insertions(+), 322 deletions(-)
diff --git a/dog/vdi.c b/dog/vdi.c
index 4fee789..2097871 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -40,8 +40,6 @@ static struct sd_option vdi_options[] = {
" neither comparing nor repairing"},
{'z', "block_size_shift", true, "specify the bit shift num for"
" data object size"},
- {'D', "fast-deep-copy", false, "fast deep copy for"
- "snapshot with --no-share"},
{ 0, NULL, false, NULL },
};
@@ -63,7 +61,6 @@ static struct vdi_cmd_data {
uint64_t oid;
bool no_share;
bool exist;
- bool fast_deep_copy;
} vdi_cmd_data = { ~0, };
struct get_vdi_info {
@@ -610,36 +607,6 @@ fail:
return NULL;
}
-struct req_fast_deep_copy {
- struct work work;
-
- struct sd_node *node;
- uint32_t src, dst;
-};
-
-static void req_fast_deep_copy_work(struct work *work)
-{
- struct req_fast_deep_copy *w =
- container_of(work, struct req_fast_deep_copy, work);
- struct sd_req hdr;
- int ret;
-
- sd_init_req(&hdr, SD_OP_FAST_DEEP_COPY);
- hdr.fast_deep_copy.src_vid = w->src;
- hdr.fast_deep_copy.dst_vid = w->dst;
- ret = dog_exec_req(&w->node->nid, &hdr, NULL);
- if (ret < 0)
- sd_err("deep copy failed");
- /* TODO: error handling */
-}
-
-static void req_fast_deep_copy_done(struct work *work)
-{
- struct req_fast_deep_copy *w =
- container_of(work, struct req_fast_deep_copy, work);
- free(w);
-}
-
static int vdi_snapshot(int argc, char **argv)
{
const char *vdiname = argv[optind++];
@@ -764,7 +731,7 @@ static int vdi_snapshot(int argc, char **argv)
new_inode = xmalloc(sizeof(*inode));
ret = read_vdi_obj(vdiname, 0, "", &new_vid, new_inode,
- sizeof(*inode));
+ SD_INODE_HEADER_SIZE);
if (ret != EXIT_SUCCESS)
goto out;
@@ -774,95 +741,48 @@ static int vdi_snapshot(int argc, char **argv)
* So we don't have to worry about that clients see working VDI with
* inconsistent data_vdi_id.
*/
- if (vdi_cmd_data.fast_deep_copy) {
- struct work_queue *q;
- struct sd_node *n;
-
- q = create_work_queue("deep copy", WQ_DYNAMIC);
-
- rb_for_each_entry(n, &sd_nroot, rb) {
- struct req_fast_deep_copy *w;
-
- w = xzalloc(sizeof(*w));
-
- w->src = vid;
- w->dst = new_vid;
- w->node = n;
-
- w->work.fn = req_fast_deep_copy_work;
- w->work.done = req_fast_deep_copy_done;
+ object_size = (UINT32_C(1) << inode->block_size_shift);
+ data_obj_buf = xzalloc(object_size);
+ max_idx = count_data_objs(inode);
- queue_work(q, &w->work);
- }
+ for (idx = 0; idx < max_idx; idx++) {
+ uint32_t vdi_id;
+ uint64_t oid;
- work_queue_wait(q);
+ vdi_show_progress(idx * object_size, inode->vdi_size);
- /* fast deep copy completed */
+ vdi_id = sd_inode_get_vid(inode, idx);
+ if (!vdi_id)
+ continue;
- for (int new_idx = 0; new_idx < SD_INODE_DATA_INDEX;
- new_idx++) {
- if (inode->data_vdi_id[new_idx])
- new_inode->data_vdi_id[new_idx] = new_vid;
+ oid = vid_to_data_oid(vdi_id, idx);
+ ret = dog_read_object(oid, data_obj_buf, object_size, 0,
+ true);
+ if (ret) {
+ ret = EXIT_FAILURE;
+ goto out;
}
- ret = dog_write_object(vid_to_vdi_oid(new_vid), 0,
- new_inode->data_vdi_id,
- SD_INODE_DATA_INDEX *
- sizeof(new_inode->data_vdi_id[0]),
- offsetof(struct sd_inode,
- data_vdi_id[0]),
- 0, new_inode->nr_copies,
- new_inode->copy_policy,
- false, true);
- if (ret < 0) {
- sd_err("updating inode failed");
+ oid = vid_to_data_oid(new_vid, idx);
+ ret = dog_write_object(oid, 0, data_obj_buf, object_size, 0, 0,
+ inode->nr_copies,
+ inode->copy_policy, true, true);
+ if (ret != SD_RES_SUCCESS) {
+ ret = EXIT_FAILURE;
goto out;
}
- } else {
- object_size = (UINT32_C(1) << inode->block_size_shift);
- data_obj_buf = xzalloc(object_size);
- max_idx = count_data_objs(inode);
-
- for (idx = 0; idx < max_idx; idx++) {
- uint32_t vdi_id;
- uint64_t oid;
-
- vdi_show_progress(idx * object_size, inode->vdi_size);
-
- vdi_id = sd_inode_get_vid(inode, idx);
- if (!vdi_id)
- continue;
-
- oid = vid_to_data_oid(vdi_id, idx);
- ret = dog_read_object(oid, data_obj_buf, object_size, 0,
- true);
- if (ret) {
- ret = EXIT_FAILURE;
- goto out;
- }
- oid = vid_to_data_oid(new_vid, idx);
- ret = dog_write_object(oid, 0, data_obj_buf,
- object_size, 0, 0,
- inode->nr_copies,
- inode->copy_policy, true, true);
- if (ret != SD_RES_SUCCESS) {
- ret = EXIT_FAILURE;
- goto out;
- }
-
- sd_inode_set_vid(new_inode, idx, new_vid);
- ret = sd_inode_write_vid(new_inode, idx, new_vid, new_vid,
- 0, false, true);
- if (ret) {
- ret = EXIT_FAILURE;
- goto out;
- }
+ sd_inode_set_vid(new_inode, idx, new_vid);
+ ret = sd_inode_write_vid(new_inode, idx, new_vid, new_vid, 0,
+ false, true);
+ if (ret) {
+ ret = EXIT_FAILURE;
+ goto out;
}
-
- vdi_show_progress(idx * object_size, inode->vdi_size);
}
+ vdi_show_progress(idx * object_size, inode->vdi_size);
+
print_result:
if (verbose) {
if (raw_output)
@@ -3207,8 +3127,8 @@ static struct subcommand vdi_cmd[] = {
{"create", "<vdiname> <size>", "PycaphrvzT", "create an image",
NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
vdi_create, vdi_options},
- {"snapshot", "<vdiname>", "saphrvTnD", "create a snapshot",
- NULL, CMD_NEED_ARG|CMD_NEED_NODELIST,
+ {"snapshot", "<vdiname>", "saphrvTn", "create a snapshot",
+ NULL, CMD_NEED_ARG,
vdi_snapshot, vdi_options},
{"clone", "<src vdi> <dst vdi>", "sPnaphrvT", "clone an image",
NULL, CMD_NEED_ARG,
@@ -3372,9 +3292,6 @@ static int vdi_parser(int ch, const char *opt)
}
vdi_cmd_data.block_size_shift = block_size_shift;
break;
- case 'D':
- vdi_cmd_data.fast_deep_copy = true;
- break;
}
return 0;
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 6b24b5c..225cc28 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -115,7 +115,6 @@
#define SD_OP_SET_RECOVERY 0xCB
#define SD_OP_SET_VNODES 0xCC
#define SD_OP_GET_VNODES 0xCD
-#define SD_OP_FAST_DEEP_COPY 0xCE
/* internal flags for hdr.flags, must be above 0x80 */
#define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index bf3e9df..16b6b35 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -210,10 +210,7 @@ struct sd_req {
uint32_t vid;
uint32_t validate;
} inode_coherence;
- struct {
- uint32_t src_vid;
- uint32_t dst_vid;
- } fast_deep_copy;
+
uint32_t __pad[8];
};
diff --git a/sheep/ops.c b/sheep/ops.c
index bc2848b..9af72d1 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -1493,16 +1493,6 @@ static int cluster_inode_coherence(const struct sd_req *req,
!!req->inode_coherence.validate, &sender->nid);
}
-static int local_fast_deep_copy(struct request *req)
-{
- sd_debug("fast deep copy, source VID: %"PRIx32", destination VID: %"
- PRIx32, req->rq.fast_deep_copy.src_vid,
- req->rq.fast_deep_copy.dst_vid);
-
- return fast_deep_copy(req->vinfo, req->rq.fast_deep_copy.src_vid,
- req->rq.fast_deep_copy.dst_vid);
-}
-
static int local_get_recovery(struct request *req)
{
struct recovery_throttling rthrottling;
@@ -1971,12 +1961,6 @@ static struct sd_op_template sd_ops[] = {
.process_main = local_set_vnodes,
},
- [SD_OP_FAST_DEEP_COPY] = {
- .name = "FAST_DEEP_COPY",
- .type = SD_OP_TYPE_LOCAL,
- .process_work = local_fast_deep_copy,
- },
-
/* gateway I/O operations */
[SD_OP_CREATE_AND_WRITE_OBJ] = {
.name = "CREATE_AND_WRITE_OBJ",
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 3399a36..a867874 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -366,7 +366,6 @@ int inode_coherence_update(uint32_t vid, bool validate,
const struct node_id *sender);
void remove_node_from_participants(const struct node_id *left);
void run_vid_gc(uint32_t vid);
-int fast_deep_copy(struct vnode_info *vinfo, uint32_t src, uint32_t dst);
extern int ec_max_data_strip;
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 2889df6..bb7fa6a 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -2178,187 +2178,3 @@ out:
sd_rw_unlock(&vdi_state_lock);
}
-
-struct fast_deep_copy_work {
- struct work work;
-
- uint32_t src, dst;
- int nr_copies, block_size_shift;
- struct vnode_info *vinfo;
- int epoch;
-
- refcnt_t refcnt;
- eventfd_t finish_fd;
-};
-
-struct copy_single_object_work {
- struct work work;
-
- uint64_t src, new;
- int block_size_shift;
- int epoch;
-
- struct fast_deep_copy_work *dcw;
-};
-
-static void copy_single_object_worker(struct work *work)
-{
- struct copy_single_object_work *w =
- container_of(work, struct copy_single_object_work, work);
- char *obj;
- int obj_size, ret;
- struct siocb iocb = { 0 };
-
- sd_debug("copying from %"PRIx64 " to %"PRIx64, w->src, w->new);
-
- obj_size = 1 << w->block_size_shift;
- obj = xzalloc(obj_size);
-
- ret = sd_read_object(w->src, obj, obj_size, 0);
- if (ret != SD_RES_SUCCESS) {
- sd_err("failed to read source object: %"PRIx64, w->src);
- goto out;
- }
-
- iocb.epoch = w->epoch;
- iocb.length = obj_size;
- iocb.offset = 0;
- iocb.buf = obj;
-
- sd_debug("writing new obj: %"PRIx64, w->new);
- sd_store->create_and_write(w->new, &iocb);
- if (ret != SD_RES_SUCCESS)
- sd_err("failed to write object: %"PRIx64, w->new);
-
-out:
- refcount_dec(&w->dcw->refcnt);
- if (refcount_read(&w->dcw->refcnt) == 0)
- eventfd_xwrite(w->dcw->finish_fd, 1);
-
- free(obj);
-}
-
-static void copy_single_object_done(struct work *work)
-{
- struct copy_single_object_work *w =
- container_of(work, struct copy_single_object_work, work);
- free(w);
-}
-
-static void fast_deep_copy_worker(struct work *work)
-{
- struct fast_deep_copy_work *w =
- container_of(work, struct fast_deep_copy_work, work);
- uint32_t *src_data_vdi_id;
- int ret;
-
- src_data_vdi_id = xcalloc(SD_INODE_DATA_INDEX, sizeof(uint32_t));
-
- ret = sd_read_object(vid_to_vdi_oid(w->src), (char *)src_data_vdi_id,
- SD_INODE_DATA_INDEX * sizeof(uint32_t),
- offsetof(struct sd_inode, data_vdi_id[0]));
- if (ret != SD_RES_SUCCESS) {
- sd_err("failed to read data_vdi_id of source VDI: %"PRIx32,
- w->src);
- goto out;
- }
-
- for (int idx = 0; idx < SD_INODE_DATA_INDEX; idx++) {
- /*
- * FIXME: need to calculate refcnt before actual queuing work.
- * cleaning is needed...
- */
- uint64_t new_oid;
- const struct sd_vnode *v;
- const struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
-
- if (!src_data_vdi_id[idx])
- continue;
-
- new_oid = vid_to_data_oid(w->dst, idx);
- oid_to_vnodes(new_oid, &w->vinfo->vroot, w->nr_copies,
- obj_vnodes);
- for (int i = 0; i < w->nr_copies; i++) {
- v = obj_vnodes[i];
- if (vnode_is_local(v))
- refcount_inc(&w->refcnt);
- }
- }
-
- sd_debug("a number of objects to copy: %d", refcount_read(&w->refcnt));
-
- for (int idx = 0; idx < SD_INODE_DATA_INDEX; idx++) {
- struct copy_single_object_work *single;
- uint64_t new_oid;
- const struct sd_vnode *v;
- const struct sd_vnode *obj_vnodes[SD_MAX_COPIES];
-
- if (!src_data_vdi_id[idx])
- continue;
-
- new_oid = vid_to_data_oid(w->dst, idx);
- oid_to_vnodes(new_oid, &w->vinfo->vroot, w->nr_copies,
- obj_vnodes);
- for (int i = 0; i < w->nr_copies; i++) {
- v = obj_vnodes[i];
- if (vnode_is_local(v))
- goto do_copy;
-
- }
- continue;
-
-do_copy:
- single = xzalloc(sizeof(*single));
-
- single->src = vid_to_data_oid(src_data_vdi_id[idx], idx);
- single->new = new_oid;
- single->block_size_shift = w->block_size_shift;
- single->epoch = w->epoch;
- single->dcw = w;
-
- single->work.fn = copy_single_object_worker;
- single->work.done = copy_single_object_done;
-
- queue_work(sys->io_wqueue, &single->work);
- }
-
-out:
- free(src_data_vdi_id);
-}
-
-static void fast_deep_copy_done(struct work *work)
-{
- struct fast_deep_copy_work *w =
- container_of(work, struct fast_deep_copy_work, work);
-
- sd_debug("fast deep copy finished (%"PRIx32" -> %"PRIx32")",
- w->src, w->dst);
- put_vnode_info(w->vinfo);
- free(w);
-}
-
-worker_fn int fast_deep_copy(struct vnode_info *vinfo,
- uint32_t src, uint32_t dst)
-{
- struct fast_deep_copy_work *w;
-
- w = xzalloc(sizeof(*w));
-
- w->src = src;
- w->dst = dst;
- w->vinfo = grab_vnode_info(vinfo);
- w->nr_copies = get_vdi_copy_number(src);
- w->block_size_shift = get_vdi_block_size_shift(src);
- w->epoch = get_latest_epoch();
- w->finish_fd = eventfd(0, EFD_SEMAPHORE);
-
- w->work.fn = fast_deep_copy_worker;
- w->work.done = fast_deep_copy_done;
-
- queue_work(sys->io_wqueue, &w->work);
-
- eventfd_xread(w->finish_fd);
- close(w->finish_fd);
-
- return SD_RES_SUCCESS;
-}
--
1.9.1
More information about the sheepdog
mailing list