[sheepdog] [PATCH 1/4] dog: make consistency check optional
MORITA Kazutaka
morita.kazutaka at gmail.com
Thu Aug 22 06:01:40 CEST 2013
From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
This splits vdi check into replica existence check and replica
consistency check, and make consistency check optional. It is because:
- Existence check can be done even when there is a VM who uses the
vdi.
- Existence check doesn't need to calculate sha1 values and it is
much faster than consistency check.
- Existence check is more important because write requests are
rejected when one of the replicas is missing.
- In most cases, replica inconsistency doesn't cause a problem
because the client should have received EIO when the consistency
problem happened and the client shouldn't use the inconsistent area
without updating the area again.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
dog/cluster.c | 9 +++++++--
dog/dog.h | 2 +-
dog/vdi.c | 45 +++++++++++++++++++++++++++++++--------------
3 files changed, 39 insertions(+), 17 deletions(-)
diff --git a/dog/cluster.c b/dog/cluster.c
index 3fd87bd..c73c500 100644
--- a/dog/cluster.c
+++ b/dog/cluster.c
@@ -20,6 +20,7 @@
static struct sd_option cluster_options[] = {
{'b', "store", true, "specify backend store"},
{'c', "copies", true, "specify the default data redundancy (number of copies)"},
+ {'C', "consistency", false, "check replica consistency"},
{'f', "force", false, "do not prompt for confirmation"},
{ 0, NULL, false, NULL },
@@ -27,6 +28,7 @@ static struct sd_option cluster_options[] = {
static struct cluster_cmd_data {
int copies;
+ bool check_consistency;
bool force;
char name[STORE_LEN];
} cluster_cmd_data;
@@ -482,7 +484,7 @@ static void cluster_check_cb(uint32_t vid, const char *name, const char *tag,
else
printf("fix vdi %s\n", name);
- do_vdi_check(inode);
+ do_vdi_check(inode, cluster_cmd_data.check_consistency);
}
static int cluster_check(int argc, char **argv)
@@ -509,7 +511,7 @@ static struct subcommand cluster_cmd[] = {
cluster_recover, cluster_options},
{"reweight", NULL, "aph", "reweight the cluster", NULL, 0,
cluster_reweight, cluster_options},
- {"check", NULL, "aph", "check and repair cluster", NULL,
+ {"check", NULL, "aphC", "check and repair cluster", NULL,
CMD_NEED_NODELIST, cluster_check, cluster_options},
{NULL,},
};
@@ -536,6 +538,9 @@ static int cluster_parser(int ch, char *opt)
}
cluster_cmd_data.copies = copies;
break;
+ case 'C':
+ cluster_cmd_data.check_consistency = true;
+ break;
case 'f':
cluster_cmd_data.force = true;
break;
diff --git a/dog/dog.h b/dog/dog.h
index 897cf92..226599a 100644
--- a/dog/dog.h
+++ b/dog/dog.h
@@ -82,7 +82,7 @@ void work_queue_wait(struct work_queue *q);
int do_vdi_create(const char *vdiname, int64_t vdi_size,
uint32_t base_vid, uint32_t *vdi_id, bool snapshot,
int nr_copies);
-int do_vdi_check(const struct sd_inode *inode);
+int do_vdi_check(const struct sd_inode *inode, bool check_consistency);
void show_progress(uint64_t done, uint64_t total, bool raw);
extern struct command vdi_command;
diff --git a/dog/vdi.c b/dog/vdi.c
index 3eb03dd..fff6a6b 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -28,6 +28,7 @@ static struct sd_option vdi_options[] = {
{'d', "delete", false, "delete a key"},
{'w', "writeback", false, "use writeback mode"},
{'c', "copies", true, "specify the data redundancy (number of copies)"},
+ {'C', "consistency", false, "check replica consistency"},
{'F', "from", true, "create a differential backup from the snapshot"},
{'f', "force", false, "do operation forcibly"},
{ 0, NULL, false, NULL },
@@ -41,6 +42,7 @@ static struct vdi_cmd_data {
bool delete;
bool prealloc;
int nr_copies;
+ bool check_consistency;
bool writeback;
int from_snapshot_id;
char from_snapshot_tag[SD_MAX_VDI_TAG_LEN];
@@ -1402,6 +1404,7 @@ struct vdi_check_info {
uint64_t total;
uint64_t *done;
int refcnt;
+ bool check_consistency;
struct work_queue *wq;
struct vdi_check_work *base;
struct vdi_check_work vcw[0];
@@ -1444,7 +1447,7 @@ static void vdi_repair_main(struct work *work)
free_vdi_check_info(info);
}
-static void vdi_hash_check_work(struct work *work)
+static void vdi_health_check_work(struct work *work)
{
struct vdi_check_work *vcw = container_of(work, struct vdi_check_work,
work);
@@ -1453,9 +1456,15 @@ static void vdi_hash_check_work(struct work *work)
struct sd_req hdr;
struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
- sd_init_req(&hdr, SD_OP_GET_HASH);
- hdr.obj.oid = info->oid;
- hdr.obj.tgt_epoch = sd_epoch;
+ if (info->check_consistency) {
+ sd_init_req(&hdr, SD_OP_GET_HASH);
+ hdr.obj.oid = info->oid;
+ hdr.obj.tgt_epoch = sd_epoch;
+ } else {
+ sd_init_req(&hdr, SD_OP_READ_PEER);
+ hdr.obj.oid = info->oid;
+ hdr.epoch = sd_epoch;
+ }
ret = dog_exec_req(vcw->vnode->nid.addr, vcw->vnode->nid.port, &hdr,
NULL);
@@ -1465,7 +1474,8 @@ static void vdi_hash_check_work(struct work *work)
switch (rsp->result) {
case SD_RES_SUCCESS:
vcw->object_found = true;
- memcpy(vcw->hash, rsp->hash.digest, sizeof(vcw->hash));
+ if (info->check_consistency)
+ memcpy(vcw->hash, rsp->hash.digest, sizeof(vcw->hash));
uatomic_set(&info->base, vcw);
break;
case SD_RES_NO_OBJ:
@@ -1479,7 +1489,7 @@ static void vdi_hash_check_work(struct work *work)
}
}
-static void vdi_hash_check_main(struct work *work)
+static void vdi_health_check_main(struct work *work)
{
struct vdi_check_work *vcw = container_of(work, struct vdi_check_work,
work);
@@ -1513,7 +1523,8 @@ static void vdi_hash_check_main(struct work *work)
}
static void queue_vdi_check_work(const struct sd_inode *inode, uint64_t oid,
- uint64_t *done, struct work_queue *wq)
+ bool check_consistency, uint64_t *done,
+ struct work_queue *wq)
{
struct vdi_check_info *info;
const struct sd_vnode *tgt_vnodes[SD_MAX_COPIES];
@@ -1523,6 +1534,7 @@ static void queue_vdi_check_work(const struct sd_inode *inode, uint64_t oid,
info->oid = oid;
info->nr_copies = nr_copies;
info->total = inode->vdi_size;
+ info->check_consistency = check_consistency;
info->done = done;
info->wq = wq;
@@ -1530,14 +1542,14 @@ static void queue_vdi_check_work(const struct sd_inode *inode, uint64_t oid,
for (int i = 0; i < nr_copies; i++) {
info->vcw[i].info = info;
info->vcw[i].vnode = tgt_vnodes[i];
- info->vcw[i].work.fn = vdi_hash_check_work;
- info->vcw[i].work.done = vdi_hash_check_main;
+ info->vcw[i].work.fn = vdi_health_check_work;
+ info->vcw[i].work.done = vdi_health_check_main;
info->refcnt++;
queue_work(info->wq, &info->vcw[i].work);
}
}
-int do_vdi_check(const struct sd_inode *inode)
+int do_vdi_check(const struct sd_inode *inode, bool check_consistency)
{
int max_idx;
uint64_t done = 0, oid;
@@ -1551,7 +1563,8 @@ int do_vdi_check(const struct sd_inode *inode)
wq = create_work_queue("vdi check", WQ_DYNAMIC);
- queue_vdi_check_work(inode, vid_to_vdi_oid(inode->vdi_id), NULL, wq);
+ queue_vdi_check_work(inode, vid_to_vdi_oid(inode->vdi_id),
+ check_consistency, NULL, wq);
max_idx = DIV_ROUND_UP(inode->vdi_size, SD_DATA_OBJ_SIZE);
vdi_show_progress(done, inode->vdi_size);
@@ -1559,7 +1572,8 @@ int do_vdi_check(const struct sd_inode *inode)
vid = inode->data_vdi_id[idx];
if (vid) {
oid = vid_to_data_oid(inode->vdi_id, idx);
- queue_vdi_check_work(inode, oid, &done, wq);
+ queue_vdi_check_work(inode, oid, check_consistency,
+ &done, wq);
} else {
done += SD_DATA_OBJ_SIZE;
vdi_show_progress(done, inode->vdi_size);
@@ -1587,7 +1601,7 @@ static int vdi_check(int argc, char **argv)
return ret;
}
- return do_vdi_check(inode);
+ return do_vdi_check(inode, vdi_cmd_data.check_consistency);
}
/* vdi backup format */
@@ -2082,7 +2096,7 @@ static int vdi_cache(int argc, char **argv)
}
static struct subcommand vdi_cmd[] = {
- {"check", "<vdiname>", "saph", "check and repair image's consistency",
+ {"check", "<vdiname>", "saphC", "check and repair image's consistency",
NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
vdi_check, vdi_options},
{"create", "<vdiname> <size>", "Pcaphrv", "create an image",
@@ -2185,6 +2199,9 @@ static int vdi_parser(int ch, char *opt)
}
vdi_cmd_data.nr_copies = nr_copies;
break;
+ case 'C':
+ vdi_cmd_data.check_consistency = true;
+ break;
case 'F':
vdi_cmd_data.from_snapshot_id = strtol(opt, &p, 10);
if (opt == p) {
--
1.7.9.5
More information about the sheepdog
mailing list