[sheepdog] [PATCH] dog: fix cluster-wide snapshot inconsistency due to vdi deleted

Ruoyu liangry at ucweb.com
Wed Aug 20 10:53:46 CEST 2014


Once vdi snapshot is deleted before executing 'dog cluster snapshot
save', error messsage just like as below is seen.

Failed to read object 807c2b2500000000 No object found
Failed to read inode header

Once a vdi is deleted, cluster snapshot save will not backup it to
farm so that cluster snapshot load cannot restore it to new cluster.
However, the vdi id is marked as in use for safety, vdi list
will try to read all vdis in use. Therefore, dog cannot read the object.

The solution is saving the deleted vdi object too. To achieve it,
a parameter, include_deleted, is added to the function parse_vdi.
The default value is false. But for cluster snapshot save, it is true.

Functional test 030 is also updated to simulate the scenario.

Reported-by: Valerio Pachera <sirio81 at gmail.com>
Signed-off-by: Ruoyu <liangry at ucweb.com>
---
 dog/cluster.c            |  5 +++--
 dog/common.c             |  9 ++++++---
 dog/dog.h                |  3 ++-
 dog/node.c               |  2 +-
 dog/vdi.c                | 19 +++++++++++--------
 tests/functional/030     | 14 ++++++++++++++
 tests/functional/030.out | 13 +++++++++++++
 7 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/dog/cluster.c b/dog/cluster.c
index 9cf96a8..86188e2 100644
--- a/dog/cluster.c
+++ b/dog/cluster.c
@@ -419,7 +419,8 @@ static int save_snapshot(int argc, char **argv)
 
 	opt.count = argc - optind;
 	opt.name = argv + optind;
-	if (parse_vdi(fill_object_tree, SD_INODE_SIZE, &opt) != SD_RES_SUCCESS)
+	if (parse_vdi(fill_object_tree, SD_INODE_SIZE,
+			&opt, true) != SD_RES_SUCCESS)
 		goto out;
 
 	if (object_tree_size() == 0) {
@@ -657,7 +658,7 @@ static void cluster_check_cb(uint32_t vid, const char *name, const char *tag,
 
 static int cluster_check(int argc, char **argv)
 {
-	if (parse_vdi(cluster_check_cb, SD_INODE_SIZE, NULL) < 0)
+	if (parse_vdi(cluster_check_cb, SD_INODE_SIZE, NULL, false) < 0)
 		return EXIT_SYSFAIL;
 
 	return EXIT_SUCCESS;
diff --git a/dog/common.c b/dog/common.c
index 2335e14..8cbf44c 100644
--- a/dog/common.c
+++ b/dog/common.c
@@ -128,7 +128,8 @@ int dog_write_object(uint64_t oid, uint64_t cow_oid, void *data,
 
 #define FOR_EACH_VDI(nr, vdis) FOR_EACH_BIT(nr, vdis, SD_NR_VDIS)
 
-int parse_vdi(vdi_parser_func_t func, size_t size, void *data)
+int parse_vdi(vdi_parser_func_t func, size_t size, void *data,
+		bool include_deleted)
 {
 	int ret;
 	unsigned long nr;
@@ -163,8 +164,10 @@ int parse_vdi(vdi_parser_func_t func, size_t size, void *data)
 			continue;
 		}
 
-		if (i->name[0] == '\0') /* this VDI has been deleted */
-			continue;
+		if (!include_deleted) {
+			if (i->name[0] == '\0') /* this VDI has been deleted */
+				continue;
+		}
 
 		if (size > SD_INODE_HEADER_SIZE) {
 			rlen = sd_inode_get_meta_size(i, size);
diff --git a/dog/dog.h b/dog/dog.h
index 9b53b3f..e904ae9 100644
--- a/dog/dog.h
+++ b/dog/dog.h
@@ -72,7 +72,8 @@ typedef void (*vdi_parser_func_t)(uint32_t vid, const char *name,
 				  const char *tag, uint32_t snapid,
 				  uint32_t flags,
 				  const struct sd_inode *i, void *data);
-int parse_vdi(vdi_parser_func_t func, size_t size, void *data);
+int parse_vdi(vdi_parser_func_t func, size_t size, void *data,
+			bool include_deleted);
 int dog_read_object(uint64_t oid, void *data, unsigned int datalen,
 		    uint64_t offset, bool direct);
 int dog_write_object(uint64_t oid, uint64_t cow_oid, void *data,
diff --git a/dog/node.c b/dog/node.c
index 3912496..344fcb8 100644
--- a/dog/node.c
+++ b/dog/node.c
@@ -87,7 +87,7 @@ static int node_info(int argc, char **argv)
 	}
 
 	if (parse_vdi(cal_total_vdi_size, SD_INODE_HEADER_SIZE,
-			&total_vdi_size) < 0)
+			&total_vdi_size, false) < 0)
 		return EXIT_SYSFAIL;
 
 	printf(raw_output ? "Total %s %s %s %d%% %s\n"
diff --git a/dog/vdi.c b/dog/vdi.c
index 84715b3..f5e6da5 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -288,7 +288,7 @@ static int vdi_list(int argc, char **argv)
 		struct get_vdi_info info;
 		memset(&info, 0, sizeof(info));
 		info.name = vdiname;
-		if (parse_vdi(print_vdi_list, SD_INODE_SIZE, &info) < 0)
+		if (parse_vdi(print_vdi_list, SD_INODE_SIZE, &info, false) < 0)
 			return EXIT_SYSFAIL;
 		return EXIT_SUCCESS;
 	}
@@ -297,12 +297,12 @@ static int vdi_list(int argc, char **argv)
 		if (!is_data_obj(vdi_cmd_data.oid))
 			return EXIT_FAILURE;
 		if (parse_vdi(print_obj_ref, SD_INODE_SIZE,
-					&vdi_cmd_data.oid) < 0)
+					&vdi_cmd_data.oid, false) < 0)
 			return EXIT_SYSFAIL;
 		return EXIT_SUCCESS;
 	}
 
-	if (parse_vdi(print_vdi_list, SD_INODE_SIZE, NULL) < 0)
+	if (parse_vdi(print_vdi_list, SD_INODE_SIZE, NULL, false) < 0)
 		return EXIT_SYSFAIL;
 	return EXIT_SUCCESS;
 }
@@ -310,7 +310,7 @@ static int vdi_list(int argc, char **argv)
 static int vdi_tree(int argc, char **argv)
 {
 	init_tree();
-	if (parse_vdi(print_vdi_tree, SD_INODE_HEADER_SIZE, NULL) < 0)
+	if (parse_vdi(print_vdi_tree, SD_INODE_HEADER_SIZE, NULL, false) < 0)
 		return EXIT_SYSFAIL;
 	dump_tree();
 
@@ -324,7 +324,7 @@ static int vdi_graph(int argc, char **argv)
 	printf("  node [shape = \"box\", fontname = \"Courier\"];\n\n");
 	printf("  \"0\" [shape = \"ellipse\", label = \"root\"];\n\n");
 
-	if (parse_vdi(print_vdi_graph, SD_INODE_HEADER_SIZE, NULL) < 0)
+	if (parse_vdi(print_vdi_graph, SD_INODE_HEADER_SIZE, NULL, false) < 0)
 		return EXIT_SYSFAIL;
 
 	/* print a footer */
@@ -2611,7 +2611,8 @@ static bool is_vdi_standalone(uint32_t vid, const char *name)
 	struct vdi_tree *vdi;
 
 	init_tree();
-	if (parse_vdi(construct_vdi_tree, SD_INODE_HEADER_SIZE, NULL) < 0)
+	if (parse_vdi(construct_vdi_tree, SD_INODE_HEADER_SIZE,
+			NULL, false) < 0)
 		return EXIT_SYSFAIL;
 
 	vdi = find_vdi_from_root(vid, name);
@@ -2746,7 +2747,8 @@ retry:
 	}
 
 	init_tree();
-	if (parse_vdi(construct_vdi_tree, SD_INODE_HEADER_SIZE, NULL) < 0)
+	if (parse_vdi(construct_vdi_tree, SD_INODE_HEADER_SIZE,
+			NULL, false) < 0)
 		return EXIT_SYSFAIL;
 
 	count = rsp->data_length / sizeof(*vs);
@@ -2801,7 +2803,8 @@ static int lock_force_unlock(int argc, char **argv)
 	struct vdi_tree *vdi;
 
 	init_tree();
-	if (parse_vdi(construct_vdi_tree, SD_INODE_HEADER_SIZE, NULL) < 0)
+	if (parse_vdi(construct_vdi_tree, SD_INODE_HEADER_SIZE,
+			NULL, false) < 0)
 		return EXIT_SYSFAIL;
 
 	vdi = find_vdi_from_root_by_name(vdiname);
diff --git a/tests/functional/030 b/tests/functional/030
index 90800b5..dfa3cf2 100755
--- a/tests/functional/030
+++ b/tests/functional/030
@@ -140,3 +140,17 @@ for i in 0 4 8; do
 	$DOG vdi read test$i | md5sum > $STORE/csum.${i}3.new
 	diff -u $STORE/csum.${i}3.org $STORE/csum.${i}3.new
 done
+
+
+# delete vdi snapshot before saving cluster snapshot
+
+echo "yes" | _cluster_format
+$DOG vdi create test 10M
+_random | $DOG vdi write test
+$DOG vdi snapshot -s vs1 test
+$DOG cluster snapshot save s4 $TMPDIR
+$DOG vdi delete -s 1 test
+$DOG vdi snapshot -s vs2 test
+$DOG cluster snapshot save s5 $TMPDIR
+echo "yes" | $DOG cluster snapshot load s5 $TMPDIR
+_vdi_list
diff --git a/tests/functional/030.out b/tests/functional/030.out
index e54237e..29063d2 100644
--- a/tests/functional/030.out
+++ b/tests/functional/030.out
@@ -99,3 +99,16 @@ s test0        1   10 MB   12 MB  0.0 MB DATE   fd34af      3          snap
   test0        0   10 MB  0.0 MB   12 MB DATE   fd34b0      3              
 s test8        1   10 MB   12 MB  0.0 MB DATE   fd4247      3          snap
   test8        0   10 MB  0.0 MB   12 MB DATE   fd4248      3              
+    __
+   ()'`;
+   /\|`
+  /  |   Caution! The cluster is not empty.
+(/_)_|_  Are you sure you want to continue? [yes/no]: using backend plain store
+    __
+   ()'`;
+   /\|`
+  /  |   Caution! The cluster is not empty.
+(/_)_|_  Are you sure you want to continue? [yes/no]: using backend plain store
+  Name        Id    Size    Used  Shared    Creation time   VDI id  Copies  Tag
+s test         2   10 MB  0.0 MB   12 MB DATE   7c2b26      3           vs2
+  test         0   10 MB  0.0 MB   12 MB DATE   7c2b27      3              
-- 
1.8.3.2





More information about the sheepdog mailing list