[sheepdog] [PATCH 4/4] dog: make repairing vdi optional

MORITA Kazutaka morita.kazutaka at gmail.com
Thu Aug 22 06:01:43 CEST 2013


From: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>

Basically, sheepdog doesn't allow multiple clients against the same
vdi, so consistency repair is not safe when there may be a VM who uses
the image.  This patch makes automatic repair optional to reduce the
risk of corrupting the image, and show confirm message when it is not
specified.

Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
 dog/cluster.c |   10 +++++++--
 dog/dog.h     |    3 ++-
 dog/vdi.c     |   65 ++++++++++++++++++++++++++++++++++++++++++++-------------
 3 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/dog/cluster.c b/dog/cluster.c
index ccceb3a..19d365f 100644
--- a/dog/cluster.c
+++ b/dog/cluster.c
@@ -18,6 +18,7 @@
 #include "farm/farm.h"
 
 static struct sd_option cluster_options[] = {
+	{'A', "auto", false, "repair the vdi without asking questions"},
 	{'b', "store", true, "specify backend store"},
 	{'c', "copies", true, "specify the default data redundancy (number of copies)"},
 	{'C', "consistency", false, "check replica consistency"},
@@ -27,6 +28,7 @@ static struct sd_option cluster_options[] = {
 };
 
 static struct cluster_cmd_data {
+	bool auto_repair;
 	int copies;
 	bool check_consistency;
 	bool force;
@@ -486,7 +488,8 @@ static void cluster_check_cb(uint32_t vid, const char *name, const char *tag,
 	else
 		printf("fix vdi %s\n", name);
 
-	do_vdi_check(inode, cluster_cmd_data.check_consistency);
+	do_vdi_check(inode, cluster_cmd_data.check_consistency,
+		     cluster_cmd_data.auto_repair);
 }
 
 static int cluster_check(int argc, char **argv)
@@ -513,7 +516,7 @@ static struct subcommand cluster_cmd[] = {
 	 cluster_recover, cluster_options},
 	{"reweight", NULL, "aph", "reweight the cluster", NULL, 0,
 	 cluster_reweight, cluster_options},
-	{"check", NULL, "aphC", "check and repair cluster", NULL,
+	{"check", NULL, "aphAC", "check and repair cluster", NULL,
 	 CMD_NEED_NODELIST, cluster_check, cluster_options},
 	{NULL,},
 };
@@ -524,6 +527,9 @@ static int cluster_parser(int ch, char *opt)
 	char *p;
 
 	switch (ch) {
+	case 'A':
+		cluster_cmd_data.auto_repair = true;
+		break;
 	case 'b':
 		pstrcpy(cluster_cmd_data.name, sizeof(cluster_cmd_data.name),
 			opt);
diff --git a/dog/dog.h b/dog/dog.h
index 0b1166c..552267d 100644
--- a/dog/dog.h
+++ b/dog/dog.h
@@ -82,7 +82,8 @@ void work_queue_wait(struct work_queue *q);
 int do_vdi_create(const char *vdiname, int64_t vdi_size,
 		  uint32_t base_vid, uint32_t *vdi_id, bool snapshot,
 		  int nr_copies);
-int do_vdi_check(const struct sd_inode *inode, bool check_consistency);
+int do_vdi_check(const struct sd_inode *inode, bool check_consistency,
+		 bool auto_repair);
 void show_progress(uint64_t done, uint64_t total, bool raw);
 
 extern struct command vdi_command;
diff --git a/dog/vdi.c b/dog/vdi.c
index a8cfdf4..62b1025 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -21,6 +21,7 @@
 #include "sha1.h"
 
 static struct sd_option vdi_options[] = {
+	{'A', "auto", false, "repair the vdi without asking questions"},
 	{'P', "prealloc", false, "preallocate all the data objects"},
 	{'i', "index", true, "specify the index of data objects"},
 	{'s', "snapshot", true, "specify a snapshot id or tag name"},
@@ -35,6 +36,7 @@ static struct sd_option vdi_options[] = {
 };
 
 static struct vdi_cmd_data {
+	bool auto_repair;
 	unsigned int index;
 	int snapshot_id;
 	char snapshot_tag[SD_MAX_VDI_TAG_LEN];
@@ -1408,6 +1410,7 @@ struct vdi_check_info {
 	uint64_t *done;
 	int refcnt;
 	bool check_consistency;
+	bool auto_repair;
 	struct work_queue *wq;
 	struct vdi_check_work *base;
 	struct vdi_check_work vcw[0];
@@ -1492,6 +1495,30 @@ static void vdi_health_check_work(struct work *work)
 	}
 }
 
+static bool vdi_repair_confirm(struct vdi_check_work *vcw)
+{
+	struct vdi_check_info *info = vcw->info;
+
+	if (info->auto_repair)
+		return true;
+
+	if (!vcw->object_found)
+		return confirm("Object %" PRIx64 " is not found on %s.  "
+			       "Fix? [yes/no]: ", info->oid,
+			       addr_to_str(vcw->vnode->nid.addr,
+					   vcw->vnode->nid.port));
+
+	if (memcmp(info->base->hash, vcw->hash, sizeof(vcw->hash)) != 0)
+		return confirm("Checksum of object %" PRIx64 " is different "
+			       "between %s and %s.  Fix? [yes/no]: ", info->oid,
+			       addr_to_str(info->base->vnode->nid.addr,
+					   info->base->vnode->nid.port),
+			       addr_to_str(vcw->vnode->nid.addr,
+					   vcw->vnode->nid.port));
+
+	panic("shouldn't reach here");
+}
+
 static void vdi_health_check_main(struct work *work)
 {
 	struct vdi_check_work *vcw = container_of(work, struct vdi_check_work,
@@ -1511,14 +1538,18 @@ static void vdi_health_check_main(struct work *work)
 		if (&info->vcw[i] == info->base)
 			continue;
 		/* need repair when object not found or consistency broken */
-		if (!info->vcw[i].object_found ||
+		if (info->vcw[i].object_found &&
 		    memcmp(info->base->hash, info->vcw[i].hash,
-			   sizeof(info->base->hash)) != 0) {
-			info->vcw[i].work.fn = vdi_repair_work;
-			info->vcw[i].work.done = vdi_repair_main;
-			info->refcnt++;
-			queue_work(info->wq, &info->vcw[i].work);
-		}
+			   sizeof(info->base->hash)) == 0)
+			continue;
+
+		if (!vdi_repair_confirm(&info->vcw[i]))
+			continue;
+
+		info->vcw[i].work.fn = vdi_repair_work;
+		info->vcw[i].work.done = vdi_repair_main;
+		info->refcnt++;
+		queue_work(info->wq, &info->vcw[i].work);
 	}
 
 	if (info->refcnt == 0)
@@ -1526,8 +1557,8 @@ static void vdi_health_check_main(struct work *work)
 }
 
 static void queue_vdi_check_work(const struct sd_inode *inode, uint64_t oid,
-				 bool check_consistency, uint64_t *done,
-				 struct work_queue *wq)
+				 bool check_consistency, bool auto_repair,
+				 uint64_t *done, struct work_queue *wq)
 {
 	struct vdi_check_info *info;
 	const struct sd_vnode *tgt_vnodes[SD_MAX_COPIES];
@@ -1538,6 +1569,7 @@ static void queue_vdi_check_work(const struct sd_inode *inode, uint64_t oid,
 	info->nr_copies = nr_copies;
 	info->total = inode->vdi_size;
 	info->check_consistency = check_consistency;
+	info->auto_repair = auto_repair;
 	info->done = done;
 	info->wq = wq;
 
@@ -1552,7 +1584,8 @@ static void queue_vdi_check_work(const struct sd_inode *inode, uint64_t oid,
 	}
 }
 
-int do_vdi_check(const struct sd_inode *inode, bool check_consistency)
+int do_vdi_check(const struct sd_inode *inode, bool check_consistency,
+		 bool auto_repair)
 {
 	int max_idx;
 	uint64_t done = 0, oid;
@@ -1567,7 +1600,7 @@ int do_vdi_check(const struct sd_inode *inode, bool check_consistency)
 	wq = create_work_queue("vdi check", WQ_DYNAMIC);
 
 	queue_vdi_check_work(inode, vid_to_vdi_oid(inode->vdi_id),
-			     check_consistency, NULL, wq);
+			     check_consistency, auto_repair, NULL, wq);
 
 	max_idx = DIV_ROUND_UP(inode->vdi_size, SD_DATA_OBJ_SIZE);
 	vdi_show_progress(done, inode->vdi_size);
@@ -1576,7 +1609,7 @@ int do_vdi_check(const struct sd_inode *inode, bool check_consistency)
 		if (vid) {
 			oid = vid_to_data_oid(vid, idx);
 			queue_vdi_check_work(inode, oid, check_consistency,
-					     &done, wq);
+					     auto_repair, &done, wq);
 		} else {
 			done += SD_DATA_OBJ_SIZE;
 			vdi_show_progress(done, inode->vdi_size);
@@ -1604,7 +1637,8 @@ static int vdi_check(int argc, char **argv)
 		return ret;
 	}
 
-	return do_vdi_check(inode, vdi_cmd_data.check_consistency);
+	return do_vdi_check(inode, vdi_cmd_data.check_consistency,
+			    vdi_cmd_data.auto_repair);
 }
 
 /* vdi backup format */
@@ -2100,7 +2134,7 @@ static int vdi_cache(int argc, char **argv)
 }
 
 static struct subcommand vdi_cmd[] = {
-	{"check", "<vdiname>", "saphC", "check and repair image's consistency",
+	{"check", "<vdiname>", "saphAC", "check and repair image's consistency",
 	 NULL, CMD_NEED_NODELIST|CMD_NEED_ARG,
 	 vdi_check, vdi_options},
 	{"create", "<vdiname> <size>", "Pcaphrv", "create an image",
@@ -2163,6 +2197,9 @@ static int vdi_parser(int ch, char *opt)
 	int nr_copies;
 
 	switch (ch) {
+	case 'A':
+		vdi_cmd_data.auto_repair = true;
+		break;
 	case 'P':
 		vdi_cmd_data.prealloc = true;
 		break;
-- 
1.7.9.5




More information about the sheepdog mailing list