[sheepdog] [PATCH 2/2] sheep: remove stale objects after recovery fully completed
MORITA Kazutaka
morita.kazutaka at lab.ntt.co.jp
Sun Aug 26 09:54:42 CEST 2012
This notifies SD_OP_COMPLETE_RECOVERY to all nodes when object
recovery finishes. Sheep removes stale objects when it receives
SD_OP_COMPLETE_RECOVERY from all nodes.
Signed-off-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
---
include/internal_proto.h | 1 +
sheep/ops.c | 54 +++++++++++++++++++++++++++++++++++++++++++--
sheep/plain_store.c | 2 +-
sheep/recovery.c | 30 ++++++++++++++++++++++++-
sheep/sheep_priv.h | 4 +-
5 files changed, 84 insertions(+), 7 deletions(-)
diff --git a/include/internal_proto.h b/include/internal_proto.h
index 3d70ba9..c1d116a 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -64,6 +64,7 @@
#define SD_OP_DISABLE_RECOVER 0xA9
#define SD_OP_INFO_RECOVER 0xAA
#define SD_OP_GET_VDI_COPIES 0xAB
+#define SD_OP_COMPLETE_RECOVERY 0xAC
/* internal flags for hdr.flags, must be above 0x80 */
#define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/sheep/ops.c b/sheep/ops.c
index c6a4f3b..499c773 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -536,8 +536,6 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
int ret;
- struct siocb iocb = { 0 };
- iocb.epoch = sys->epoch;
if (node_in_recovery())
return SD_RES_NODE_IN_RECOVERY;
@@ -546,7 +544,7 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp,
return SD_RES_SUCCESS;
if (sd_store->cleanup)
- ret = sd_store->cleanup(&iocb);
+ ret = sd_store->cleanup();
else
ret = SD_RES_NO_SUPPORT;
@@ -561,6 +559,49 @@ static int cluster_notify_vdi_del(const struct sd_req *req, struct sd_rsp *rsp,
return objlist_cache_cleanup(vid);
}
+static int cluster_recovery_completion(const struct sd_req *req,
+ struct sd_rsp *rsp,
+ void *data)
+{
+ static struct sd_node recovereds[SD_MAX_NODES], *node;
+ static size_t nr_recovereds;
+ static int latest_epoch;
+ struct vnode_info *vnode_info;
+ int i;
+
+ node = (struct sd_node *)data;
+
+ if (latest_epoch < req->epoch) {
+ dprintf("new epoch %d\n", req->epoch);
+ latest_epoch = req->epoch;
+ nr_recovereds = 0;
+ }
+
+ recovereds[nr_recovereds++] = *(struct sd_node *)node;
+ qsort(recovereds, nr_recovereds, sizeof(*recovereds), node_id_cmp);
+
+ dprintf("%s is recovered at epoch %d\n", node_to_str(node), req->epoch);
+ for (i = 0; i < nr_recovereds; i++)
+ dprintf("[%x] %s\n", i, node_to_str(recovereds + i));
+
+ if (sys->epoch != latest_epoch)
+ return SD_RES_SUCCESS;
+
+ vnode_info = get_vnode_info();
+
+ if (vnode_info->nr_nodes == nr_recovereds &&
+ memcmp(vnode_info->nodes, recovereds,
+ sizeof(*recovereds) * nr_recovereds) == 0) {
+ dprintf("all nodes are recovered at epoch %d\n", req->epoch);
+ if (sd_store->cleanup)
+ sd_store->cleanup();
+ }
+
+ put_vnode_info(vnode_info);
+
+ return SD_RES_SUCCESS;
+}
+
static int local_set_cache_size(const struct sd_req *req, struct sd_rsp *rsp,
void *data)
{
@@ -947,6 +988,13 @@ static struct sd_op_template sd_ops[] = {
.process_main = cluster_notify_vdi_del,
},
+ [SD_OP_COMPLETE_RECOVERY] = {
+ .name = "COMPLETE_RECOVERY",
+ .type = SD_OP_TYPE_CLUSTER,
+ .force = 1,
+ .process_main = cluster_recovery_completion,
+ },
+
/* local operations */
[SD_OP_GET_STORE_LIST] = {
.name = "GET_STORE_LIST",
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index 8028f7d..8888521 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -138,7 +138,7 @@ out:
return ret;
}
-int default_cleanup(struct siocb *iocb)
+int default_cleanup(void)
{
rmdir_r(stale_dir);
if (mkdir(stale_dir, 0755) < 0) {
diff --git a/sheep/recovery.c b/sheep/recovery.c
index 72c90cd..dec7261 100644
--- a/sheep/recovery.c
+++ b/sheep/recovery.c
@@ -337,6 +337,31 @@ static inline void run_next_rw(struct recovery_work *rw)
dprintf("recovery work is superseded\n");
}
+static void notify_recovery_completion_work(struct work *work)
+{
+ struct recovery_work *rw = container_of(work, struct recovery_work,
+ work);
+ struct sd_req hdr;
+ int ret;
+
+ sd_init_req(&hdr, SD_OP_COMPLETE_RECOVERY);
+ hdr.epoch = rw->epoch;
+ hdr.flags = SD_FLAG_CMD_WRITE;
+ hdr.data_length = sizeof(sys->this_node);
+
+ ret = exec_local_req(&hdr, &sys->this_node);
+ if (ret != SD_RES_SUCCESS)
+ eprintf("failed to notify recovery completion, %d\n",
+ rw->epoch);
+}
+
+static void notify_recovery_completion_main(struct work *work)
+{
+ struct recovery_work *rw = container_of(work, struct recovery_work,
+ work);
+ free_recovery_work(rw);
+}
+
static inline void finish_recovery(struct recovery_work *rw)
{
recovering_work = NULL;
@@ -345,7 +370,10 @@ static inline void finish_recovery(struct recovery_work *rw)
if (sd_store->end_recover)
sd_store->end_recover(sys->epoch - 1, rw->old_vinfo);
- free_recovery_work(rw);
+ /* notify recovery completion to other nodes */
+ rw->work.fn = notify_recovery_completion_work;
+ rw->work.done = notify_recovery_completion_main;
+ queue_work(sys->recovery_wqueue, &rw->work);
dprintf("recovery complete: new epoch %"PRIu32"\n",
sys->recovered_epoch);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 224be51..1bc7e60 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -154,7 +154,7 @@ struct store_driver {
int (*purge_obj)(void);
/* Operations for snapshot */
int (*snapshot)(struct siocb *);
- int (*cleanup)(struct siocb *);
+ int (*cleanup)(void);
int (*restore)(struct siocb *);
int (*get_snap_file)(struct siocb *);
};
@@ -166,7 +166,7 @@ int default_read(uint64_t oid, struct siocb *iocb);
int default_link(uint64_t oid, struct siocb *iocb, uint32_t tgt_epoch);
int default_atomic_put(uint64_t oid, struct siocb *iocb);
int default_end_recover(uint32_t old_epoch, struct vnode_info *old_vnode_info);
-int default_cleanup(struct siocb *iocb);
+int default_cleanup(void);
int default_format(char *name);
int default_remove_object(uint64_t oid);
int default_purge_obj(void);
--
1.7.2.5
More information about the sheepdog
mailing list