[sheepdog] [PATCH v3 4/4] farm: comment why we can't remove objlist entry while deleting object
levin li
levin108 at gmail.com
Thu Jul 19 04:19:11 CEST 2012
From: levin li <xingke.lwp at taobao.com>
Signed-off-by: levin li <xingke.lwp at taobao.com>
---
sheep/farm/trunk.c | 56 +++++++++++++++++++++++++++++++++-------------------
1 file changed, 36 insertions(+), 20 deletions(-)
diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c
index b45427d..6201fef 100644
--- a/sheep/farm/trunk.c
+++ b/sheep/farm/trunk.c
@@ -275,30 +275,46 @@ int trunk_file_write_recovery(unsigned char *outsha1)
list_for_each_entry_safe(entry, t, &trunk_active_list, active_list) {
oid = entry->raw.oid;
- if (oid_stale(oid)) {
- dprintf("stale oid %"PRIx64"\n", oid);
- if (trunk_entry_no_sha1(entry) || trunk_entry_is_dirty(entry)) {
- if (fill_entry_new_sha1(entry) < 0) {
- eprintf("fill sha1 fail\n");
- goto out;
- }
- }
-
- old_sha1 = omap_tree_insert(oid, entry->raw.sha1);
- if (old_sha1)
- sha1_file_try_delete(old_sha1);
-
- strbuf_add(&buf, &entry->raw, sizeof(struct trunk_entry));
- active_nr++;
+ if (!oid_stale(oid))
+ continue;
- snprintf(p, sizeof(p), "%s%016"PRIx64, obj_path, entry->raw.oid);
- if (unlink(p) < 0) {
- eprintf("%s:%m\n", p);
+ dprintf("stale oid %"PRIx64"\n", oid);
+ if (trunk_entry_no_sha1(entry) || trunk_entry_is_dirty(entry)) {
+ if (fill_entry_new_sha1(entry) < 0) {
+ eprintf("fill sha1 fail\n");
goto out;
}
- dprintf("remove file %"PRIx64"\n", entry->raw.oid);
- put_entry(entry);
}
+
+ old_sha1 = omap_tree_insert(oid, entry->raw.sha1);
+ if (old_sha1)
+ sha1_file_try_delete(old_sha1);
+
+ strbuf_add(&buf, &entry->raw, sizeof(struct trunk_entry));
+ active_nr++;
+
+ /*
+ * We remove object from the working directory, but can not
+ * remove the objlist cache entry.
+ *
+ * Consider the following case:
+ *
+ * If node A ends recovery before some other nodes, and then it
+ * delete the stale object from the farm working directory, but
+ * if it also deletes the objlist entry, it may causes problem,
+ * try thinking of another node B which issues a get_obj_list()
+ * request after the objlist entry is deleted on the original
+ * node A, but still not added to the target node C, then
+ * node B would not find the objlist entry, then for node B,
+ * this object is ignored to recovery, so it's lost.
+ */
+ snprintf(p, sizeof(p), "%s%016"PRIx64, obj_path, entry->raw.oid);
+ if (unlink(p) < 0) {
+ eprintf("%s:%m\n", p);
+ goto out;
+ }
+ dprintf("remove file %"PRIx64"\n", entry->raw.oid);
+ put_entry(entry);
}
h = (struct sha1_file_hdr*)buf.buf;
--
1.7.10
More information about the sheepdog
mailing list