[sheepdog] [PATCH v3 4/4] farm: comment why we can't remove objlist entry while deleting object

levin li levin108 at gmail.com
Thu Jul 19 04:19:11 CEST 2012


From: levin li <xingke.lwp at taobao.com>


Signed-off-by: levin li <xingke.lwp at taobao.com>
---
 sheep/farm/trunk.c |   56 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 20 deletions(-)

diff --git a/sheep/farm/trunk.c b/sheep/farm/trunk.c
index b45427d..6201fef 100644
--- a/sheep/farm/trunk.c
+++ b/sheep/farm/trunk.c
@@ -275,30 +275,46 @@ int trunk_file_write_recovery(unsigned char *outsha1)
 
 	list_for_each_entry_safe(entry, t, &trunk_active_list, active_list) {
 		oid = entry->raw.oid;
-		if (oid_stale(oid)) {
-			dprintf("stale oid %"PRIx64"\n", oid);
-			if (trunk_entry_no_sha1(entry) || trunk_entry_is_dirty(entry)) {
-				if (fill_entry_new_sha1(entry) < 0) {
-					eprintf("fill sha1 fail\n");
-					goto out;
-				}
-			}
-
-			old_sha1 = omap_tree_insert(oid, entry->raw.sha1);
-			if (old_sha1)
-				sha1_file_try_delete(old_sha1);
-
-			strbuf_add(&buf, &entry->raw, sizeof(struct trunk_entry));
-			active_nr++;
+		if (!oid_stale(oid))
+			continue;
 
-			snprintf(p, sizeof(p), "%s%016"PRIx64, obj_path, entry->raw.oid);
-			if (unlink(p) < 0) {
-				eprintf("%s:%m\n", p);
+		dprintf("stale oid %"PRIx64"\n", oid);
+		if (trunk_entry_no_sha1(entry) || trunk_entry_is_dirty(entry)) {
+			if (fill_entry_new_sha1(entry) < 0) {
+				eprintf("fill sha1 fail\n");
 				goto out;
 			}
-			dprintf("remove file %"PRIx64"\n", entry->raw.oid);
-			put_entry(entry);
 		}
+
+		old_sha1 = omap_tree_insert(oid, entry->raw.sha1);
+		if (old_sha1)
+			sha1_file_try_delete(old_sha1);
+
+		strbuf_add(&buf, &entry->raw, sizeof(struct trunk_entry));
+		active_nr++;
+
+		/*
+		 * We remove object from the working directory, but can not
+		 * remove the objlist cache entry.
+		 *
+		 * Consider the following case:
+		 *
+		 * If node A ends recovery before some other nodes, and then it
+		 * delete the stale object from the farm working directory, but
+		 * if it also deletes the objlist entry, it may causes problem,
+		 * try thinking of another node B which issues a get_obj_list()
+		 * request after the objlist entry is deleted on the original
+		 * node A, but still not added to the target node C, then
+		 * node B would not find the objlist entry, then for node B,
+		 * this object is ignored to recovery, so it's lost.
+		 */
+		snprintf(p, sizeof(p), "%s%016"PRIx64, obj_path, entry->raw.oid);
+		if (unlink(p) < 0) {
+			eprintf("%s:%m\n", p);
+			goto out;
+		}
+		dprintf("remove file %"PRIx64"\n", entry->raw.oid);
+		put_entry(entry);
 	}
 
 	h = (struct sha1_file_hdr*)buf.buf;
-- 
1.7.10




More information about the sheepdog mailing list