[sheepdog] [PATCH RFC 2/2] collie: add a new subcommand "recovery-progress" to node

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Mon Jul 29 09:39:27 CEST 2013


This patch adds a new subcommand recovery-progress to node. With this
subcommand, users can show a progress of recovery process.

$ sudo collie node recovery-progress
 99.7 % [==============================================>] 7047 / 7068
recovery process ends

The denominator (7068 in the above case) indicates a number of entire
object which should be checked. The numerator (7047 in the above case)
indicates a number of objects which is already checked or copied.

Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 collie/node.c |   82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 81 insertions(+), 1 deletion(-)

diff --git a/collie/node.c b/collie/node.c
index 0cd7e7a..2019c3e 100644
--- a/collie/node.c
+++ b/collie/node.c
@@ -120,6 +120,84 @@ static int node_info(int argc, char **argv)
 	return EXIT_SUCCESS;
 }
 
+/*
+ * recovery_progress_unit()
+ *
+ * Obtain recovery progress information and return true if the recovery process
+ * ends.
+ */
+static bool recovery_progress_unit(struct recovery_progress *prog)
+{
+	int ret;
+	bool res = false;
+	struct sd_req req;
+
+	sd_init_req(&req, SD_OP_STAT_RECOVERY);
+	req.data_length = sizeof(*prog);
+
+	ret = collie_exec_req(sdhost, sdport, &req, prog);
+	switch (ret) {
+	case SD_RES_SUCCESS:
+		res = true;
+		break;
+	case SD_RES_NODE_IN_RECOVERY:
+		break;
+	default:
+		fprintf(stderr, "obtaining recovery progress fail: %s\n",
+			sd_strerror(ret));
+		res = true;
+		break;
+	}
+
+	return res;
+}
+
+static int node_recovery_progress(int argc, char **argv)
+{
+	struct recovery_progress prog;
+	bool end;
+
+	/*
+	 * ToDos
+	 *
+	 * 1. Calculate size of actually copied objects.
+	 *    For doing this, not so trivial changes for recovery process is
+	 *    required.
+	 *
+	 * 2. Print remaining physical time.
+	 *    Even if it is not so acculate, it is helpful for administrators.
+	 */
+	end = recovery_progress_unit(&prog);
+	if (end) {
+		printf("node %s:%d isn't doing recovery\n", sdhost, sdport);
+		return EXIT_SUCCESS;
+	}
+
+	do {
+		end = recovery_progress_unit(&prog);
+		if (end)
+			break;
+
+		switch (prog.state) {
+		case RW_PREPARE_LIST:
+			printf("\rpreparing a checked object list...");
+			break;
+		case RW_NOTIFY_COMPLETION:
+			printf("\rnotifying a completion of recovery...");
+			break;
+		case RW_RECOVER_OBJ:
+			show_progress(prog.nr_recovered_objects,
+				prog.nr_entire_checked_objects, true);
+			break;
+		}
+
+		sleep(1);
+	} while (true);
+
+	printf("recovery process ends\n");
+	return EXIT_SUCCESS;
+}
+
 static int node_recovery(int argc, char **argv)
 {
 	int i, ret;
@@ -327,7 +405,9 @@ static struct subcommand node_cmd[] = {
 	 SUBCMD_FLAG_NEED_NODELIST, node_list},
 	{"info", NULL, "aprh", "show information about each node", NULL,
 	 SUBCMD_FLAG_NEED_NODELIST, node_info},
-	{"recovery", NULL, "aprh", "show nodes in recovery", NULL,
+	{"recovery-progress", NULL, "aprh", "show recovery progress of node",
+	 NULL, 0, node_recovery_progress},
+	{"recovery", NULL, "aph", "show nodes in recovery", NULL,
 	 SUBCMD_FLAG_NEED_NODELIST, node_recovery},
 	{"md", "[disks]", "apAh", "See 'collie node md' for more information",
 	 node_md_cmd, SUBCMD_FLAG_NEED_ARG, node_md, node_options},
-- 
1.7.10.4




More information about the sheepdog mailing list