[Sheepdog] [PATCH v3 12/12] collie: enable cluster-wide snapshot command
Liu Yuan
namei.unix at gmail.com
Fri Dec 23 15:39:30 CET 2011
From: Liu Yuan <tailai.ly at taobao.com>
Usage:
$collie cluster snapshot # snapshot the whole cluster
$collie cluster snapshot -l # list user snapshot info
$collie cluster snapshot -R epoch # restore to state of targeted snapshot
For e.g. below script
#!/bin/bash
pkill sheep
rm store/* -rf
for i in 0 1 2; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i -p 700$i;sleep 1;done
collie/collie cluster format
qemu-img create -f raw sheepdog:test 1G
qemu-io -c "write -P 0x1 0 4M" sheepdog:test
collie/collie cluster snapshot # Index 1
qemu-io -c "write -P 0x2 4M 4M" sheepdog:test
collie/collie cluster snapshot # 2
qemu-io -c "write -P 0x3 8M 4M" sheepdog:test
collie/collie cluster snapshot # 3
collie/collie cluster snapshot -l
collie/collie cluster snapshot -R 2
============================================================
OUTPUT:
Formatting 'sheepdog:test', fmt=raw size=1073741824
wrote 4194304/4194304 bytes at offset 0
4 MiB, 1 ops; 0.0000 sec (8.142 MiB/sec and 2.0354 ops/sec)
wrote 4194304/4194304 bytes at offset 4194304
4 MiB, 1 ops; 0.0000 sec (7.987 MiB/sec and 1.9968 ops/sec)
wrote 4194304/4194304 bytes at offset 8388608
4 MiB, 1 ops; 0.0000 sec (9.381 MiB/sec and 2.3452 ops/sec)
Index Snapshot Time
1 Fri Dec 23 22:21:05 2011
2 Fri Dec 23 22:21:08 2011
3 Fri Dec 23 22:21:11 2011
Cluster restore to the snapshot 2
...
Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
---
collie/cluster.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++
collie/collie.c | 2 +
include/sheepdog_proto.h | 1 +
sheep/farm/farm.c | 2 +-
sheep/ops.c | 17 +++--
sheep/sheep_priv.h | 2 -
6 files changed, 172 insertions(+), 11 deletions(-)
diff --git a/collie/cluster.c b/collie/cluster.c
index 6fbda6b..3149a3a 100644
--- a/collie/cluster.c
+++ b/collie/cluster.c
@@ -15,11 +15,14 @@
#include <sys/time.h>
#include "collie.h"
+#include "../sheep/farm.h"
struct cluster_cmd_data {
int copies;
int nohalt;
int force;
+ int epoch;
+ int list;
} cluster_cmd_data;
static void set_nohalt(uint16_t *p)
@@ -177,6 +180,146 @@ static int cluster_shutdown(int argc, char **argv)
return EXIT_SUCCESS;
}
+static int restore_snap(int epoch)
+{
+ int fd, ret;
+ struct sd_obj_req hdr;
+ struct sd_obj_rsp *rsp = (struct sd_obj_rsp *)&hdr;
+ unsigned rlen, wlen;
+
+ fd = connect_to(sdhost, sdport);
+ if (fd < 0)
+ return EXIT_SYSFAIL;
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ hdr.opcode = SD_OP_RESTORE;
+ hdr.tgt_epoch = epoch;
+
+ rlen = 0;
+ wlen = 0;
+ ret = exec_req(fd, (struct sd_req *)&hdr, NULL, &wlen, &rlen);
+ close(fd);
+
+ if (ret) {
+ fprintf(stderr, "Failed to connect\n");
+ return EXIT_SYSFAIL;
+ }
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ fprintf(stderr, "Restore failed: %s\n",
+ sd_strerror(rsp->result));
+ return EXIT_FAILURE;
+ }
+
+ printf("Cluster restore to the snapshot %d\n", epoch);
+ return EXIT_SUCCESS;
+}
+
+static void print_list(void *buf, unsigned len)
+{
+ struct snap_log *log_buf = (struct snap_log *)buf;
+ unsigned nr = len / sizeof (struct snap_log), i;
+
+ printf("Index\t\tSnapshot Time\n");
+ for (i = 0; i < nr; i++, log_buf++) {
+ time_t *t = (time_t *)&log_buf->time;
+ printf("%d\t\t", log_buf->epoch);
+ printf("%s", ctime(t));
+ }
+}
+
+static int list_snap(void)
+{
+ int fd, ret = EXIT_SYSFAIL;
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+ unsigned rlen, wlen;
+ void *buf;
+
+ buf = malloc(SD_DATA_OBJ_SIZE);
+ if (!buf)
+ return EXIT_SYSFAIL;
+
+ fd = connect_to(sdhost, sdport);
+ if (fd < 0) {
+ goto out;
+ }
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ wlen = 0;
+ rlen = SD_DATA_OBJ_SIZE;
+ hdr.opcode = SD_OP_SNAP_FILE;
+ hdr.data_length = rlen;
+
+ ret = exec_req(fd, &hdr, buf, &wlen, &rlen);
+ close(fd);
+
+ if (ret) {
+ fprintf(stderr, "Failed to connect\n");
+ goto out;
+ }
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ fprintf(stderr, "Restore failed: %s\n",
+ sd_strerror(rsp->result));
+ ret = EXIT_FAILURE;
+ goto out;
+ }
+
+ print_list(buf, rlen);
+out:
+ free(buf);
+ return EXIT_SUCCESS;
+}
+
+static int do_snapshot(void)
+{
+ int fd, ret;
+ struct sd_req hdr;
+ struct sd_rsp *rsp = (struct sd_rsp *)&hdr;
+ unsigned rlen, wlen;
+
+ fd = connect_to(sdhost, sdport);
+ if (fd < 0)
+ return EXIT_SYSFAIL;
+
+ memset(&hdr, 0, sizeof(hdr));
+
+ hdr.opcode = SD_OP_SNAPSHOT;
+
+ rlen = 0;
+ wlen = 0;
+ ret = exec_req(fd, &hdr, NULL, &wlen, &rlen);
+ close(fd);
+
+ if (ret) {
+ fprintf(stderr, "Failed to connect\n");
+ return EXIT_SYSFAIL;
+ }
+
+ if (rsp->result != SD_RES_SUCCESS) {
+ fprintf(stderr, "Restore failed: %s\n",
+ sd_strerror(rsp->result));
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int cluster_snapshot(int argc, char **argv)
+{
+ int ret, epoch = cluster_cmd_data.epoch;
+ if (epoch)
+ ret = restore_snap(epoch);
+ else if(cluster_cmd_data.list)
+ ret = list_snap();
+ else
+ ret = do_snapshot();
+ return ret;
+}
+
#define RECOVER_PRINT \
"Caution! Please try starting all the cluster nodes normally before\n\
running this command.\n\n\
@@ -243,6 +386,8 @@ static struct subcommand cluster_cmd[] = {
SUBCMD_FLAG_NEED_NODELIST, cluster_shutdown},
{"recover", NULL, "afph", "manually recover the cluster",
0, cluster_recover},
+ {"snapshot", NULL, "aRlph", "snapshot/restore the cluster",
+ 0, cluster_snapshot},
{NULL,},
};
@@ -270,6 +415,20 @@ static int cluster_parser(int ch, char *opt)
case 'f':
cluster_cmd_data.force = 1;
break;
+ case 'R':
+ cluster_cmd_data.epoch = strtol(opt, &p, 10);
+ if (opt == p) {
+ fprintf(stderr, "The epoch must be an integer\n");
+ exit(EXIT_FAILURE);
+ }
+ if (cluster_cmd_data.epoch < 1) {
+ fprintf(stderr, "The epoch must be greater than 0\n");
+ exit(EXIT_FAILURE);
+ }
+ break;
+ case 'l':
+ cluster_cmd_data.list = 1;
+ break;
}
return 0;
diff --git a/collie/collie.c b/collie/collie.c
index 19cc9a9..b3eb0b1 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -44,6 +44,8 @@ static const struct sd_option collie_options[] = {
{'H', "nohalt", 0, "serve IO requests even if there are too few\n\
nodes for the configured redundancy"},
{'f', "force", 0, "do not prompt for confirmation"},
+ {'R', "restore", 1, "restore the cluster"},
+ {'l', "list", 0, "list the user epoch information"},
{ 0, NULL, 0, NULL },
};
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index b664223..8789095 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -22,6 +22,7 @@
#define SD_OP_CREATE_AND_WRITE_OBJ 0x01
#define SD_OP_READ_OBJ 0x02
#define SD_OP_WRITE_OBJ 0x03
+#define SD_OP_REMOVE_OBJ 0x04
#define SD_OP_NEW_VDI 0x11
#define SD_OP_LOCK_VDI 0x12
diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
index c71eaf2..db7f070 100644
--- a/sheep/farm/farm.c
+++ b/sheep/farm/farm.c
@@ -528,10 +528,10 @@ static int farm_get_snap_file(struct siocb *iocb)
size_t size;
int nr;
- dprintf("try get snap file\n");
buffer = snap_log_read(&nr, 1);
if (!buffer)
goto out;
+ dprintf("get snap file, nr %d\n", nr);
size = nr * sizeof(struct snap_log);
memcpy(iocb->buf, buffer, size);
iocb->length = size;
diff --git a/sheep/ops.c b/sheep/ops.c
index 5300039..46d2445 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -400,9 +400,9 @@ static int cluster_snapshot(const struct sd_req *req, struct sd_rsp *rsp,
static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
- const struct sd_vdi_req *hdr = (const struct sd_vdi_req *)req;
+ const struct sd_obj_req *hdr = (const struct sd_obj_req *)req;
int ret = SD_RES_SUCCESS;
- struct siocb iocb = { .epoch = hdr->epoch };
+ struct siocb iocb = { .epoch = hdr->tgt_epoch };
if (store.restore)
ret = store.restore(&iocb);
@@ -410,7 +410,7 @@ static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
return ret;
}
-static int cluster_get_snap_file(const struct sd_req *req, struct sd_rsp *rsp,
+static int local_get_snap_file(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
int ret = SD_RES_SUCCESS;
@@ -485,11 +485,6 @@ static struct sd_op_template sd_ops[] = {
.force = 1,
.process_main = cluster_restore,
},
- [SD_OP_SNAP_FILE] = {
- .type = SD_OP_TYPE_CLUSTER,
- .force = 1,
- .process_main = cluster_get_snap_file,
- },
/* local operations */
[SD_OP_READ_VDIS] = {
@@ -531,6 +526,12 @@ static struct sd_op_template sd_ops[] = {
.process_work = local_get_epoch,
},
+ [SD_OP_SNAP_FILE] = {
+ .type = SD_OP_TYPE_LOCAL,
+ .force = 1,
+ .process_work = local_get_snap_file,
+ },
+
/* I/O operations */
[SD_OP_CREATE_AND_WRITE_OBJ] = {
.type = SD_OP_TYPE_IO,
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 4110881..04cd4df 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -21,8 +21,6 @@
#include "sheep.h"
#include "cluster.h"
-#define SD_OP_REMOVE_OBJ 0x91
-
#define SD_OP_GET_OBJ_LIST 0xA1
#define SD_OP_GET_EPOCH 0XA2
--
1.7.8.rc3
More information about the sheepdog
mailing list