[sheepdog] [PATCH v2 2/2] collie: add a new option --progress to "node recovery" for showing recovery progress

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Thu Aug 1 06:03:09 CEST 2013


This patch adds a new option --progress (or -P) to the node recovery
subcommand. With this subcommand, users can show a progress of
recovery process.

Example:
$ sudo collie node recovery --progress
 99.7 % [==============================================>] 7047 / 7068

The denominator (7068 in the above case) indicates a number of entire
object which should be checked. The numerator (7047 in the above case)
indicates a number of objects which is already checked or copied.

Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
v2:
 - make this feature as an option of "node recovery", not a new subcommand
 - clean coding style
 -- renaming recovery_progress_unit() -> get_recovery_progress()

 collie/node.c |  110 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 108 insertions(+), 2 deletions(-)

diff --git a/collie/node.c b/collie/node.c
index 69229f4..a1392b0 100644
--- a/collie/node.c
+++ b/collie/node.c
@@ -13,6 +13,7 @@
 
 static struct node_cmd_data {
 	bool all_nodes;
+	bool recovery_progress;
 } node_cmd_data;
 
 static void cal_total_vdi_size(uint32_t vid, const char *name, const char *tag,
@@ -120,10 +121,111 @@ static int node_info(int argc, char **argv)
 	return EXIT_SUCCESS;
 }
 
+/*
+ * get_recovery_progress()
+ *
+ * Returned values:
+ * -1 ... request failed
+ *  0 ... recovery ended
+ *  1 ... recovery is continuing
+ */
+static bool get_recovery_progress(struct recovery_progress *prog)
+{
+	int ret;
+	struct sd_req req;
+	struct sd_rsp *rsp = (struct sd_rsp *)&req;
+
+	sd_init_req(&req, SD_OP_STAT_RECOVERY);
+	req.data_length = sizeof(*prog);
+
+	ret = collie_exec_req(sdhost, sdport, &req, prog);
+	if (ret < 0) {
+		fprintf(stderr, "Failed to execute request\n");
+		ret = -1;
+		goto out;
+	}
+
+	switch (rsp->result) {
+	case SD_RES_SUCCESS:
+		ret = 0;
+		break;
+	case SD_RES_NODE_IN_RECOVERY:
+		ret = 1;
+		break;
+	default:
+		fprintf(stderr, "obtaining recovery progress fail: %s\n",
+			sd_strerror(ret));
+		ret = -1;
+		break;
+	}
+
+out:
+	return ret;
+}
+
+static int node_recovery_progress(void)
+{
+	int status, prev_status = -2;
+
+	/*
+	 * prev_status is required for expressing state transition, and -2
+	 * indicates the previous state is not initialized
+	 */
+
+	/*
+	 * ToDos
+	 *
+	 * 1. Calculate size of actually copied objects.
+	 *    For doing this, not so trivial changes for recovery process are
+	 *    required.
+	 *
+	 * 2. Print remaining physical time.
+	 *    Even if it is not so acculate, it is helpful for administrators.
+	 */
+
+	do {
+		struct recovery_progress prog;
+
+		status = get_recovery_progress(&prog);
+		if (status != 1) {
+			if (status == 0 && prev_status != -2)
+				/* not an immediate completion */
+				show_progress(prog.nr_total, prog.nr_total,
+					true);
+
+			break;
+		}
+
+		switch (prog.state) {
+		case RW_PREPARE_LIST:
+			printf("\rpreparing a checked object list...");
+			break;
+		case RW_NOTIFY_COMPLETION:
+			printf("\rnotifying a completion of recovery...");
+			break;
+		case RW_RECOVER_OBJ:
+			show_progress(prog.nr_finished, prog.nr_total, true);
+			break;
+		default:
+			panic("unknown state of recovery progress: %d",
+				prog.state);
+			break;
+		}
+
+		prev_status = status;
+		sleep(1);
+	} while (true);
+
+	return status == -1 ? EXIT_SYSFAIL : EXIT_SUCCESS;
+}
+
 static int node_recovery(int argc, char **argv)
 {
 	int i, ret;
 
+	if (node_cmd_data.recovery_progress)
+		return node_recovery_progress();
+
 	if (!raw_output) {
 		printf("Nodes In Recovery:\n");
 		printf("  Id   Host:Port         V-Nodes       Zone\n");
@@ -313,6 +415,9 @@ static int node_parser(int ch, char *opt)
 	case 'A':
 		node_cmd_data.all_nodes = true;
 		break;
+	case 'P':
+		node_cmd_data.recovery_progress = true;
+		break;
 	}
 
 	return 0;
@@ -320,6 +425,7 @@ static int node_parser(int ch, char *opt)
 
 static struct sd_option node_options[] = {
 	{'A', "all", false, "show md information of all the nodes"},
+	{'P', "progress", false, "show progress of recovery in the node"},
 
 	{ 0, NULL, false, NULL },
 };
@@ -331,8 +437,8 @@ static struct subcommand node_cmd[] = {
 	 SUBCMD_FLAG_NEED_NODELIST, node_list},
 	{"info", NULL, "aprh", "show information about each node", NULL,
 	 SUBCMD_FLAG_NEED_NODELIST, node_info},
-	{"recovery", NULL, "aprh", "show nodes in recovery", NULL,
-	 SUBCMD_FLAG_NEED_NODELIST, node_recovery},
+	{"recovery", NULL, "aphP", "show recovery information of nodes", NULL,
+	 SUBCMD_FLAG_NEED_NODELIST, node_recovery, node_options},
 	{"md", "[disks]", "apAh", "See 'collie node md' for more information",
 	 node_md_cmd, SUBCMD_FLAG_NEED_ARG, node_md, node_options},
 	{NULL,},
-- 
1.7.10.4




More information about the sheepdog mailing list