[sheepdog] [PATCH RFT 4/4] sheep: garbage collect needless VIDs

Hitoshi Mitake mitake.hitoshi at lab.ntt.co.jp
Mon Dec 15 10:37:00 CET 2014


Current sheepdog never recycles VIDs. But it will cause problems
e.g. VID space exhaustion, too much garbage inode objects.

Keeping deleted inode objects is required because living inodes
(snapshots or clones) can point objects of the deleted inodes. So if
every member of VDI family is deleted, it is safe to remove deleted
inode objects.

Cc: Saeki Masaki <saeki.masaki at po.ntts.co.jp>
Cc: Yuka Kawasaki <kawasaki.yuka at po.ntts.co.jp>
Signed-off-by: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
---
 sheep/ops.c        |  1 +
 sheep/sheep_priv.h |  1 +
 sheep/vdi.c        | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+)

diff --git a/sheep/ops.c b/sheep/ops.c
index f87372d..643c4eb 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -198,6 +198,7 @@ static int post_cluster_del_vdi(const struct sd_req *req, struct sd_rsp *rsp,
 	if (ret == SD_RES_SUCCESS) {
 		atomic_set_bit(vid, sys->vdi_deleted);
 		vdi_mark_deleted(vid);
+		run_vid_gc(vid);
 	}
 
 	if (!sys->enable_object_cache)
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index bc45f49..baaab57 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -361,6 +361,7 @@ void invalidate_other_nodes(uint32_t vid);
 int inode_coherence_update(uint32_t vid, bool validate,
 			   const struct node_id *sender);
 void remove_node_from_participants(const struct node_id *left);
+void run_vid_gc(uint32_t vid);
 
 extern int ec_max_data_strip;
 
diff --git a/sheep/vdi.c b/sheep/vdi.c
index a0cc0a7..b2cc1ff 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -149,6 +149,30 @@ ret:
 	sd_mutex_unlock(&vdi_family_mutex);
 }
 
+static main_fn struct vdi_family_member *lookup_root(struct vdi_family_member
+						     *member)
+{
+	if (!member->parent)
+		return member;
+
+	return lookup_root(member->parent);
+}
+
+static main_fn bool is_all_members_deleted(struct vdi_family_member *member)
+{
+	struct vdi_family_member *child;
+
+	if (!member->entry->deleted)
+		return false;
+
+	list_for_each_entry(child, &member->child_list_head, child_list_node) {
+		if (!is_all_members_deleted(child))
+			return false;
+	}
+
+	return true;
+}
+
 /*
  * ec_max_data_strip represent max number of data strips in the cluster. When
  * nr_zones < it, we don't purge the stale objects because for erasure coding,
@@ -2035,3 +2059,58 @@ main_fn void free_vdi_state_snapshot(int epoch)
 
 	panic("invalid free request for vdi state snapshot, epoch: %d", epoch);
 }
+
+static main_fn void do_vid_gc(struct vdi_family_member *member)
+{
+	struct vdi_state_entry *entry = member->entry;
+	uint32_t vid = entry->vid;
+	uint64_t oid = vid_to_vdi_oid(vid);
+	struct vdi_family_member *child;
+
+	rb_erase(&entry->node, &vdi_state_root);
+	free(entry);
+
+	list_for_each_entry(child, &member->child_list_head, child_list_node) {
+		list_del(&child->child_list_node);
+		do_vid_gc(child);
+	}
+
+	free(member);
+
+	if (sd_store && sd_store->exist(oid, -1))
+		/* TODO: gc other objects */
+		sd_store->remove_object(oid, -1);
+
+	atomic_clear_bit(vid, sys->vdi_inuse);
+	atomic_clear_bit(vid, sys->vdi_deleted);
+}
+
+main_fn void run_vid_gc(uint32_t vid)
+{
+	struct vdi_state_entry *entry;
+	struct vdi_family_member *member, *root;
+
+	sd_write_lock(&vdi_state_lock);
+	sd_mutex_lock(&vdi_family_mutex);
+	entry = vdi_state_search(&vdi_state_root, vid);
+	if (!entry) {
+		sd_alert("vid %"PRIx32" doesn't have its entry", vid);
+		goto out;
+	}
+
+	member = entry->family_member;
+	root = lookup_root(member);
+
+	if (is_all_members_deleted(root)) {
+		sd_info("all members of the family (root: %"PRIx32
+			") are deleted", root->vid);
+		do_vid_gc(root);
+	} else
+		sd_info("not all members of the family (root: %"PRIx32
+			") are deleted", root->vid);
+
+out:
+	sd_mutex_unlock(&vdi_family_mutex);
+	sd_rw_unlock(&vdi_state_lock);
+
+}
-- 
1.8.3.2




More information about the sheepdog mailing list