[sheepdog] [PATCH v2 11/11] make dog copy_policy aware
Liu Yuan
namei.unix at gmail.com
Thu Sep 26 09:25:48 CEST 2013
- add copy_plicy to sd_write_object()
- farm is copy_policy aware too
- we can pass copy_plicy in struct sd_req, which means that even if the vdi
isn't registered in the sheep, we can still give sheep enough information it is
an easure object or not. This is required by cluster snapshot operation.
With this patch, we can pass functional/030
Signed-off-by: Liu Yuan <namei.unix at gmail.com>
---
dog/cluster.c | 10 +++++-----
dog/common.c | 4 +++-
dog/dog.h | 4 ++--
dog/farm/farm.c | 7 +++++--
dog/farm/farm.h | 10 ++++++----
dog/farm/object_tree.c | 15 ++++++++++-----
dog/vdi.c | 45 ++++++++++++++++++++++++++++-----------------
sheep/gateway.c | 20 +++++++++++++-------
sheep/plain_store.c | 2 +-
sheep/sheep_priv.h | 2 +-
sheep/vdi.c | 3 ++-
11 files changed, 76 insertions(+), 46 deletions(-)
diff --git a/dog/cluster.c b/dog/cluster.c
index 256734d..485133b 100644
--- a/dog/cluster.c
+++ b/dog/cluster.c
@@ -242,8 +242,8 @@ out:
}
static void fill_object_tree(uint32_t vid, const char *name, const char *tag,
- uint32_t snapid, uint32_t flags,
- const struct sd_inode *i, void *data)
+ uint32_t snapid, uint32_t flags,
+ const struct sd_inode *i, void *data)
{
uint64_t vdi_oid = vid_to_vdi_oid(vid), vmstate_oid;
int nr_objs, nr_vmstate_object;
@@ -253,7 +253,7 @@ static void fill_object_tree(uint32_t vid, const char *name, const char *tag,
return;
/* fill vdi object id */
- object_tree_insert(vdi_oid, i->nr_copies);
+ object_tree_insert(vdi_oid, i->nr_copies, 0);
/* fill data object id */
nr_objs = count_data_objs(i);
@@ -261,7 +261,7 @@ static void fill_object_tree(uint32_t vid, const char *name, const char *tag,
if (i->data_vdi_id[idx]) {
uint64_t oid = vid_to_data_oid(i->data_vdi_id[idx],
idx);
- object_tree_insert(oid, i->nr_copies);
+ object_tree_insert(oid, i->nr_copies, i->copy_policy);
}
}
@@ -269,7 +269,7 @@ static void fill_object_tree(uint32_t vid, const char *name, const char *tag,
nr_vmstate_object = DIV_ROUND_UP(i->vm_state_size, SD_DATA_OBJ_SIZE);
for (int idx = 0; idx < nr_vmstate_object; idx++) {
vmstate_oid = vid_to_vmstate_oid(vid, idx);
- object_tree_insert(vmstate_oid, i->nr_copies);
+ object_tree_insert(vmstate_oid, i->nr_copies, i->copy_policy);
}
}
diff --git a/dog/common.c b/dog/common.c
index c79f20e..0685be6 100644
--- a/dog/common.c
+++ b/dog/common.c
@@ -86,7 +86,8 @@ int sd_read_object(uint64_t oid, void *data, unsigned int datalen,
int sd_write_object(uint64_t oid, uint64_t cow_oid, void *data,
unsigned int datalen, uint64_t offset, uint32_t flags,
- int copies, bool create, bool direct)
+ uint8_t copies, uint8_t copy_policy, bool create,
+ bool direct)
{
struct sd_req hdr;
struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
@@ -105,6 +106,7 @@ int sd_write_object(uint64_t oid, uint64_t cow_oid, void *data,
hdr.flags |= SD_FLAG_CMD_DIRECT;
hdr.obj.copies = copies;
+ hdr.obj.copy_policy = copy_policy;
hdr.obj.oid = oid;
hdr.obj.cow_oid = cow_oid;
hdr.obj.offset = offset;
diff --git a/dog/dog.h b/dog/dog.h
index aac0bba..e8b7f0f 100644
--- a/dog/dog.h
+++ b/dog/dog.h
@@ -70,7 +70,7 @@ int sd_read_object(uint64_t oid, void *data, unsigned int datalen,
uint64_t offset, bool direct);
int sd_write_object(uint64_t oid, uint64_t cow_oid, void *data,
unsigned int datalen, uint64_t offset, uint32_t flags,
- int copies, bool create, bool direct);
+ uint8_t copies, uint8_t, bool create, bool direct);
int dog_exec_req(const struct node_id *, struct sd_req *hdr, void *data);
int send_light_req(const struct node_id *, struct sd_req *hdr);
int do_generic_subcommand(struct subcommand *sub, int argc, char **argv);
@@ -79,7 +79,7 @@ void confirm(const char *message);
void work_queue_wait(struct work_queue *q);
int do_vdi_create(const char *vdiname, int64_t vdi_size,
uint32_t base_vid, uint32_t *vdi_id, bool snapshot,
- int nr_copies, uint8_t copy_policy);
+ uint8_t nr_copies, uint8_t copy_policy);
int do_vdi_check(const struct sd_inode *inode);
void show_progress(uint64_t done, uint64_t total, bool raw);
diff --git a/dog/farm/farm.c b/dog/farm/farm.c
index 0e3a8eb..bf01eff 100644
--- a/dog/farm/farm.c
+++ b/dog/farm/farm.c
@@ -279,13 +279,15 @@ out:
free(sw);
}
-static int queue_save_snapshot_work(uint64_t oid, int nr_copies, void *data)
+static int queue_save_snapshot_work(uint64_t oid, uint32_t nr_copies,
+ uint8_t copy_policy, void *data)
{
struct snapshot_work *sw = xzalloc(sizeof(struct snapshot_work));
struct strbuf *trunk_buf = data;
sw->entry.oid = oid;
sw->entry.nr_copies = nr_copies;
+ sw->entry.copy_policy = copy_policy;
sw->trunk_buf = trunk_buf;
sw->work.fn = do_save_object;
sw->work.done = save_object_done;
@@ -355,7 +357,8 @@ static void do_load_object(struct work *work)
goto error;
if (sd_write_object(sw->entry.oid, 0, buffer, size, 0, 0,
- sw->entry.nr_copies, true, true) != 0)
+ sw->entry.nr_copies, sw->entry.copy_policy,
+ true, true) != 0)
goto error;
if (is_vdi_obj(sw->entry.oid)) {
diff --git a/dog/farm/farm.h b/dog/farm/farm.h
index 0f457e4..c045845 100644
--- a/dog/farm/farm.h
+++ b/dog/farm/farm.h
@@ -20,7 +20,9 @@
struct trunk_entry {
uint64_t oid;
- int nr_copies;
+ uint8_t nr_copies;
+ uint8_t copy_policy;
+ uint8_t reserved[2];
unsigned char sha1[SHA1_DIGEST_SIZE];
};
@@ -71,11 +73,11 @@ void *sha1_file_read(const unsigned char *sha1, size_t *size);
/* object_tree.c */
int object_tree_size(void);
-void object_tree_insert(uint64_t oid, int nr_copies);
+void object_tree_insert(uint64_t oid, uint32_t nr_copies, uint8_t);
void object_tree_free(void);
void object_tree_print(void);
-int for_each_object_in_tree(int (*func)(uint64_t oid, int nr_copies,
- void *data), void *data);
+int for_each_object_in_tree(int (*func)(uint64_t oid, uint32_t nr_copies,
+ uint8_t, void *data), void *data);
/* slice.c */
int slice_write(void *buf, size_t len, unsigned char *outsha1);
void *slice_read(const unsigned char *sha1, size_t *outsize);
diff --git a/dog/farm/object_tree.c b/dog/farm/object_tree.c
index c624fea..b90b58b 100644
--- a/dog/farm/object_tree.c
+++ b/dog/farm/object_tree.c
@@ -16,7 +16,8 @@
struct object_tree_entry {
uint64_t oid;
- int nr_copies;
+ uint8_t nr_copies;
+ uint8_t copy_policy;
struct rb_node node;
};
@@ -43,7 +44,7 @@ static struct object_tree_entry *do_insert(struct rb_root *root,
return rb_insert(root, new, node, object_tree_cmp);
}
-void object_tree_insert(uint64_t oid, int nr_copies)
+void object_tree_insert(uint64_t oid, uint32_t nr_copies, uint8_t copy_policy)
{
struct rb_root *root = &tree.root;
struct object_tree_entry *p = NULL;
@@ -52,6 +53,8 @@ void object_tree_insert(uint64_t oid, int nr_copies)
cached_entry = xzalloc(sizeof(*cached_entry));
cached_entry->oid = oid;
cached_entry->nr_copies = nr_copies;
+ cached_entry->copy_policy = copy_policy;
+
rb_init_node(&cached_entry->node);
p = do_insert(root, cached_entry);
if (!p) {
@@ -80,14 +83,16 @@ int object_tree_size(void)
return tree.nr_objs;
}
-int for_each_object_in_tree(int (*func)(uint64_t oid, int nr_copies,
- void *data), void *data)
+int for_each_object_in_tree(int (*func)(uint64_t oid, uint32_t nr_copies,
+ uint8_t copy_policy, void *data),
+ void *data)
{
struct object_tree_entry *entry;
int ret = -1;
rb_for_each_entry(entry, &tree.root, node) {
- if (func(entry->oid, entry->nr_copies, data) < 0)
+ if (func(entry->oid, entry->nr_copies, entry->copy_policy,
+ data) < 0)
goto out;
}
ret = 0;
diff --git a/dog/vdi.c b/dog/vdi.c
index 9004b08..db32193 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -467,7 +467,7 @@ static int read_vdi_obj(const char *vdiname, int snapid, const char *tag,
int do_vdi_create(const char *vdiname, int64_t vdi_size,
uint32_t base_vid, uint32_t *vdi_id, bool snapshot,
- int nr_copies, uint8_t copy_policy)
+ uint8_t nr_copies, uint8_t copy_policy)
{
struct sd_req hdr;
struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
@@ -550,7 +550,7 @@ static int vdi_create(int argc, char **argv)
oid = vid_to_data_oid(vid, idx);
ret = sd_write_object(oid, 0, NULL, 0, 0, 0, inode->nr_copies,
- true, true);
+ inode->copy_policy, true, true);
if (ret != SD_RES_SUCCESS) {
ret = EXIT_FAILURE;
goto out;
@@ -558,8 +558,9 @@ static int vdi_create(int argc, char **argv)
inode->data_vdi_id[idx] = vid;
ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid, sizeof(vid),
- SD_INODE_HEADER_SIZE + sizeof(vid) * idx, 0,
- inode->nr_copies, false, true);
+ SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
+ 0, inode->nr_copies, inode->copy_policy,
+ false, true);
if (ret) {
ret = EXIT_FAILURE;
goto out;
@@ -601,7 +602,8 @@ static int vdi_snapshot(int argc, char **argv)
ret = sd_write_object(vid_to_vdi_oid(vid), 0, vdi_cmd_data.snapshot_tag,
SD_MAX_VDI_TAG_LEN,
offsetof(struct sd_inode, tag),
- 0, inode->nr_copies, false, false);
+ 0, inode->nr_copies, inode->copy_policy,
+ false, false);
if (ret != SD_RES_SUCCESS)
return EXIT_FAILURE;
@@ -674,15 +676,17 @@ static int vdi_clone(int argc, char **argv)
oid = vid_to_data_oid(new_vid, idx);
ret = sd_write_object(oid, 0, buf, size, 0, 0, inode->nr_copies,
- true, true);
+ inode->copy_policy, true, true);
if (ret != SD_RES_SUCCESS) {
ret = EXIT_FAILURE;
goto out;
}
- ret = sd_write_object(vid_to_vdi_oid(new_vid), 0, &new_vid, sizeof(new_vid),
- SD_INODE_HEADER_SIZE + sizeof(new_vid) * idx, 0,
- inode->nr_copies, false, true);
+ ret = sd_write_object(vid_to_vdi_oid(new_vid), 0, &new_vid,
+ sizeof(new_vid),
+ SD_INODE_HEADER_SIZE + sizeof(new_vid) * idx, 0,
+ inode->nr_copies, inode->copy_policy,
+ false, true);
if (ret) {
ret = EXIT_FAILURE;
goto out;
@@ -734,8 +738,10 @@ static int vdi_resize(int argc, char **argv)
}
inode->vdi_size = new_size;
- ret = sd_write_object(vid_to_vdi_oid(vid), 0, inode, SD_INODE_HEADER_SIZE, 0,
- 0, inode->nr_copies, false, true);
+ ret = sd_write_object(vid_to_vdi_oid(vid), 0,
+ inode, SD_INODE_HEADER_SIZE, 0,
+ 0, inode->nr_copies, inode->copy_policy,
+ false, true);
if (ret != SD_RES_SUCCESS) {
sd_err("Failed to update an inode header");
return EXIT_FAILURE;
@@ -1308,7 +1314,8 @@ static int vdi_write(int argc, char **argv)
inode->data_vdi_id[idx] = inode->vdi_id;
oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
ret = sd_write_object(oid, old_oid, buf, len, offset, flags,
- inode->nr_copies, create, false);
+ inode->nr_copies, inode->copy_policy,
+ create, false);
if (ret != SD_RES_SUCCESS) {
sd_err("Failed to write VDI");
ret = EXIT_FAILURE;
@@ -1316,9 +1323,11 @@ static int vdi_write(int argc, char **argv)
}
if (create) {
- ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid, sizeof(vid),
- SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
- flags, inode->nr_copies, false, false);
+ ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid,
+ sizeof(vid),
+ SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
+ flags, inode->nr_copies,
+ inode->copy_policy, false, false);
if (ret) {
ret = EXIT_FAILURE;
goto out;
@@ -1787,13 +1796,15 @@ static int restore_obj(struct obj_backup *backup, uint32_t vid,
/* send a copy-on-write request */
ret = sd_write_object(vid_to_data_oid(vid, backup->idx), parent_oid,
backup->data, backup->length, backup->offset,
- 0, parent_inode->nr_copies, true, true);
+ 0, parent_inode->nr_copies,
+ parent_inode->copy_policy, true, true);
if (ret != SD_RES_SUCCESS)
return ret;
return sd_write_object(vid_to_vdi_oid(vid), 0, &vid, sizeof(vid),
SD_INODE_HEADER_SIZE + sizeof(vid) * backup->idx,
- 0, parent_inode->nr_copies, false, true);
+ 0, parent_inode->nr_copies,
+ parent_inode->copy_policy, false, true);
}
static uint32_t do_restore(const char *vdiname, int snapid, const char *tag)
diff --git a/sheep/gateway.c b/sheep/gateway.c
index d216120..68e5203 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -170,17 +170,23 @@ out:
return reqs;
}
-bool is_erasure_object(uint64_t oid)
+/* Requests from dog might not have vdi registered yet in the vdi state */
+static bool is_erasure_req(struct request *req)
{
- return !is_vdi_obj(oid) && get_vdi_copy_policy(oid_to_vid(oid)) != 0;
+ if (req->rq.obj.copy_policy > 0)
+ return true;
+ return is_erasure_oid(req->rq.obj.oid);
+}
+
+bool is_erasure_oid(uint64_t oid)
+{
+ return !is_vdi_obj(oid) && get_vdi_copy_policy(oid_to_vid(oid)) > 0;
}
/* Prepare request iterator and buffer for each replica */
static struct req_iter *prepare_requests(struct request *req, int *nr)
{
- uint64_t oid = req->rq.obj.oid;
-
- if (is_erasure_object(oid))
+ if (is_erasure_req(req))
return prepare_erasure_requests(req, nr);
else
return prepare_replication_requests(req, nr);
@@ -197,7 +203,7 @@ static void finish_requests(struct request *req, struct req_iter *reqs,
int end = DIV_ROUND_UP(off + len, SD_EC_D_SIZE), i, j;
int nr_stripe = end - start;
- if (!is_erasure_object(oid))
+ if (!is_erasure_oid(oid))
goto out;
sd_debug("start %d, end %d, send %d, off %"PRIu64 ", len %"PRIu32,
@@ -516,7 +522,7 @@ int gateway_read_obj(struct request *req)
if (!bypass_object_cache(req))
return object_cache_handle_request(req);
- if (is_erasure_object(oid))
+ if (is_erasure_oid(oid))
return gateway_forward_request(req);
else
return gateway_replication_read(req);
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index af9ecbf..3ab707a 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -295,7 +295,7 @@ int prealloc(int fd, uint32_t size)
static size_t get_store_objsize(uint64_t oid)
{
- if (is_erasure_object(oid))
+ if (is_erasure_oid(oid))
return SD_EC_OBJECT_SIZE;
return get_objsize(oid);
}
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 3c543dc..634efc6 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -401,7 +401,7 @@ int gateway_read_obj(struct request *req);
int gateway_write_obj(struct request *req);
int gateway_create_and_write_obj(struct request *req);
int gateway_remove_obj(struct request *req);
-bool is_erasure_object(uint64_t oid);
+bool is_erasure_oid(uint64_t oid);
/* backend store */
int peer_read_obj(struct request *req);
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 97e52ea..221905d 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -93,7 +93,8 @@ int get_vdi_copy_policy(uint32_t vid)
sd_unlock(&vdi_state_lock);
if (!entry)
- panic("copy policy for %" PRIx32 " not found", vid);
+ /* If not found, it must be non-erasure object */
+ return 0;
return entry->copy_policy;
}
--
1.7.9.5
More information about the sheepdog
mailing list